StartDate: 2022-03-07 11:38:51+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: e47857cd4049e60bcaad34007397a0fdf39da8bc CommitTime: 2022-03-07 11:28:59 +0100 CommitAuthor: Frederick Stein CommitSubject: Fix dashboard (#1980) Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=e47857cd4049e60bcaad34007397a0fdf39da8bc Sending build context to Docker daemon 362.3MB Step 1/41 : FROM ubuntu:20.04 20.04: Pulling from library/ubuntu 7c3b88808835: Already exists Digest: sha256:8ae9bafbb64f63a50caab98fd3a5e37b3eb837a3e0780b78e5218e63193961f9 Status: Downloaded newer image for ubuntu:20.04 ---> 2b4cba85892a Step 2/41 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 2478d1e60fc2 Step 3/41 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> f04a754c1b4e Step 4/41 : RUN ./install_requirements.sh ubuntu:20.04 ---> Using cache ---> b7c4b3bf2ba3 Step 5/41 : RUN mkdir scripts ---> Using cache ---> 0be4797ba2b9 Step 6/41 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 20eb19422415 Step 7/41 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> bf33731cd10d Step 8/41 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --dry-run ---> Using cache ---> 19b0b3c97393 Step 9/41 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 9fcf7ef497eb Step 10/41 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 5196fa37023c Step 11/41 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> dd293eb974d0 Step 12/41 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> e76392d58f21 Step 13/41 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> a0c151ba1669 Step 14/41 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 89cd0e6c3bff Step 15/41 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 3f34682c7ad5 Step 16/41 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> f53b7bbcf5ed Step 17/41 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 6059648341ff Step 18/41 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> a38ee222da95 Step 19/41 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 033717c35479 Step 20/41 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> ce105c9308f4 Step 21/41 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> da58aaf5626a Step 22/41 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 88850f0a7d75 Step 23/41 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 6180e9da3590 Step 24/41 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> b03deb66bd99 Step 25/41 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> ffaf2be6a82f Step 26/41 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 0044bd1612fd Step 27/41 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 5b158606bcfe Step 28/41 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> a96d7d4752c8 Step 29/41 : WORKDIR /workspace/cp2k ---> Using cache ---> b5782b82b813 Step 30/41 : COPY ./Makefile . ---> Using cache ---> 1b8dc8633e7e Step 31/41 : COPY ./src ./src ---> Using cache ---> a42238f0f99c Step 32/41 : COPY ./exts ./exts ---> Using cache ---> fc66f347cf1d Step 33/41 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 61a9bfffbe3f Step 34/41 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true )" ---> Running in 4a1e84babee2 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 4a1e84babee2 ---> d41ce143fc2c Step 35/41 : COPY ./data ./data ---> bf01ddac2c5c Step 36/41 : COPY ./tests ./tests ---> 806a23ccf25d Step 37/41 : COPY ./tools/regtesting ./tools/regtesting ---> a38e9faeb3cc Step 38/41 : COPY ./benchmarks ./benchmarks ---> 246de88321d3 Step 39/41 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 965ddac7f83d Step 40/41 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 44340e0ffe5c ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.037 163.136 163.136 qs_mol_dyn_low 1 2.0 0.004 0.004 162.306 162.306 qs_forces 11 3.9 0.002 0.002 162.246 162.246 qs_energies 11 4.9 0.001 0.001 151.675 151.675 scf_env_do_scf 11 5.9 0.001 0.001 119.511 119.511 velocity_verlet 10 3.0 0.002 0.002 114.683 114.683 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 80.191 80.191 init_scf_loop 11 6.9 0.000 0.000 39.104 39.104 prepare_preconditioner 11 7.9 0.000 0.000 35.119 35.119 make_preconditioner 11 8.9 0.000 0.000 35.119 35.119 make_full_inverse_cholesky 11 9.9 0.000 0.000 33.159 33.159 rebuild_ks_matrix 119 8.3 0.001 0.001 32.434 32.434 qs_ks_build_kohn_sham_matrix 119 9.3 0.019 0.019 32.433 32.433 qs_ks_update_qs_env 119 7.6 0.001 0.001 30.167 30.167 qs_rho_update_rho 119 7.7 0.001 0.001 28.446 28.446 calculate_rho_elec 119 8.7 1.546 1.546 28.445 28.445 qs_scf_new_mos 108 7.5 0.001 0.001 27.605 27.605 qs_scf_loop_do_ot 108 8.5 0.001 0.001 27.604 27.604 ot_scf_mini 108 9.5 0.003 0.003 25.651 25.651 dbcsr_multiply_generic 2286 12.5 0.190 0.190 23.540 23.540 grid_collocate_task_list 119 9.7 22.440 22.440 22.440 22.440 sum_up_and_integrate 119 10.3 0.377 0.377 20.526 20.526 integrate_v_rspace 119 11.3 0.585 0.585 20.148 20.148 cp_fm_cholesky_invert 11 10.9 20.036 20.036 20.036 20.036 grid_integrate_task_list 119 12.3 17.105 17.105 17.105 17.105 init_scf_run 11 5.9 0.001 0.001 16.548 16.548 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.547 16.547 wfi_extrapolate 11 7.9 0.001 0.001 15.726 15.726 ot_mini 108 10.5 0.001 0.001 15.232 15.232 cp_gemm 81 9.0 0.000 0.000 15.209 15.209 cp_gemm_cosma 81 10.0 15.208 15.208 15.208 15.208 make_m2s 4572 13.5 0.066 0.066 13.055 13.055 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.500 10.500 qs_ot_get_derivative 108 11.5 0.002 0.002 7.944 7.944 pw_transfer 1439 11.6 0.094 0.094 7.619 7.619 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 7.319 7.319 ot_diis_step 108 11.5 0.006 0.006 7.283 7.283 make_images 4572 14.5 2.632 2.632 7.021 7.021 cp_fm_cholesky_decompose 22 10.9 6.878 6.878 6.878 6.878 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.500 6.500 dbcsr_complete_redistribute 329 12.2 3.033 3.033 6.430 6.430 qs_ot_get_p 119 10.4 0.001 0.001 6.419 6.419 dbcsr_make_dense_low 5837 15.5 0.098 0.098 6.211 6.211 fft_wrap_pw1pw2_140 487 13.2 0.638 0.638 6.196 6.196 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.156 6.156 apply_single 119 13.6 0.001 0.001 6.156 6.156 make_dense_data 5837 16.5 5.460 5.460 6.090 6.090 multiply_cannon 2286 13.5 1.049 1.049 5.881 5.881 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.817 5.817 dbcsr_make_images_dense 3978 14.8 0.027 0.027 5.577 5.577 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.289 5.289 qs_create_task_list 11 7.9 0.000 0.000 5.283 5.283 generate_qs_task_list 11 8.9 3.645 3.645 5.283 5.283 dbcsr_copy 2102 12.0 0.286 0.286 5.266 5.266 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.950 4.950 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.950 4.950 dbcsr_copy_into_existing 22 7.9 4.933 4.933 4.934 4.934 qs_ot_p2m_diag 50 11.0 0.213 0.213 4.843 4.843 pw_poisson_solve 119 10.3 1.854 1.854 4.489 4.489 density_rs2pw 119 9.7 0.007 0.007 4.458 4.458 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.368 4.368 cp_dbcsr_syevd 50 12.0 0.004 0.004 4.296 4.296 multiply_cannon_loop 2286 14.5 0.051 0.051 4.270 4.270 multiply_cannon_multrec 2286 15.5 4.149 4.149 4.218 4.218 cp_fm_diag_elpa 50 13.0 0.000 0.000 4.139 4.139 cp_fm_diag_elpa_base 50 14.0 4.082 4.082 4.138 4.138 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.083 4.083 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.068 4.068 fft3d_s 1202 14.6 3.328 3.328 3.335 3.335 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.328 3.328 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.018 72.208 72.209 qs_mol_dyn_low 1 2.0 0.007 0.008 72.067 72.073 qs_forces 11 3.9 0.002 0.002 72.011 72.011 qs_energies 11 4.9 0.001 0.002 67.118 67.120 scf_env_do_scf 11 5.9 0.001 0.001 60.477 60.478 scf_env_do_scf_inner_loop 108 6.5 0.003 0.010 56.066 56.067 velocity_verlet 10 3.0 0.002 0.002 43.263 43.264 rebuild_ks_matrix 119 8.3 0.001 0.001 28.146 28.193 qs_ks_build_kohn_sham_matrix 119 9.3 0.022 0.023 28.145 28.192 qs_ks_update_qs_env 119 7.6 0.001 0.001 25.050 25.094 sum_up_and_integrate 119 10.3 0.046 0.050 22.082 22.114 integrate_v_rspace 119 11.3 0.005 0.005 22.035 22.071 qs_rho_update_rho 119 7.7 0.001 0.001 21.839 21.854 calculate_rho_elec 119 8.7 0.048 0.049 21.838 21.853 dbcsr_multiply_generic 2286 12.5 0.133 0.137 16.850 16.995 grid_integrate_task_list 119 12.3 15.752 16.443 15.752 16.443 grid_collocate_task_list 119 9.7 15.515 16.332 15.515 16.332 qs_scf_new_mos 108 7.5 0.001 0.001 13.774 13.815 qs_scf_loop_do_ot 108 8.5 0.001 0.001 13.773 13.814 ot_scf_mini 108 9.5 0.003 0.003 12.940 12.976 multiply_cannon 2286 13.5 0.225 0.230 11.238 11.510 multiply_cannon_loop 2286 14.5 0.223 0.241 10.172 10.600 mp_waitall_1 169478 16.3 8.422 8.838 8.422 8.838 ot_mini 108 10.5 0.001 0.001 7.635 7.673 rs_pw_transfer 974 11.9 0.016 0.017 6.631 7.510 density_rs2pw 119 9.7 0.009 0.009 5.734 6.621 multiply_cannon_metrocomm3 18288 15.5 0.082 0.086 5.483 5.957 pw_transfer 1439 11.6 0.145 0.152 5.751 5.825 fft_wrap_pw1pw2 1201 12.6 0.014 0.015 5.450 5.511 potential_pw2rs 119 12.3 0.010 0.010 5.054 5.063 fft_wrap_pw1pw2_140 487 13.2 0.546 0.565 4.735 4.881 init_scf_run 11 5.9 0.000 0.002 4.583 4.583 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.582 4.583 init_scf_loop 11 6.9 0.000 0.001 4.393 4.394 wfi_extrapolate 11 7.9 0.001 0.001 4.186 4.186 fft3d_ps 1201 14.6 2.225 2.337 4.069 4.148 ot_diis_step 108 11.5 0.005 0.006 3.860 3.861 make_m2s 4572 13.5 0.075 0.079 3.798 3.847 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.775 3.842 apply_single 119 13.6 0.001 0.001 3.775 3.842 qs_ot_get_derivative 108 11.5 0.001 0.002 3.742 3.774 multiply_cannon_multrec 18288 15.5 3.489 3.632 3.506 3.650 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.311 3.319 mp_waitany 9880 13.7 2.311 3.187 2.311 3.187 make_images 4572 14.5 0.188 0.193 3.112 3.161 rs_pw_transfer_RS2PW_140 130 11.5 0.518 0.572 2.076 2.964 rs_pw_transfer_PW2RS_140 130 13.9 1.216 1.289 2.542 2.568 mp_alltoall_d11v 2130 13.8 1.513 2.129 1.513 2.129 qs_ot_get_p 119 10.4 0.001 0.001 1.810 1.868 rs_gather_matrices 119 12.3 0.130 0.143 1.175 1.824 cp_gemm 81 9.0 0.000 0.000 1.652 1.657 cp_gemm_cosma 81 10.0 1.652 1.656 1.652 1.656 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.436 1.548 make_images_data 4572 15.5 0.062 0.068 1.444 1.546 prepare_preconditioner 11 7.9 0.000 0.000 1.446 1.459 make_preconditioner 11 8.9 0.000 0.000 1.446 1.459 qs_energies_init_hamiltonians 11 5.9 0.000 0.001 1.445 1.445 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=77.32, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.44, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=20.036, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.105, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.208, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.878, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.149, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=25.066999999999993, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.515, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.752, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.652, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.489, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.311, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.422, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.033 220.674 220.674 qs_mol_dyn_low 1 2.0 0.004 0.004 219.745 219.745 qs_forces 11 3.9 0.002 0.002 219.687 219.687 qs_energies 11 4.9 0.001 0.001 205.779 205.779 scf_env_do_scf 11 5.9 0.001 0.001 169.025 169.025 velocity_verlet 10 3.0 0.002 0.002 146.602 146.602 scf_env_do_scf_inner_loop 96 6.5 0.008 0.008 126.354 126.354 rebuild_ks_matrix 107 8.3 0.001 0.001 62.812 62.812 qs_ks_build_kohn_sham_matrix 107 9.3 0.018 0.018 62.811 62.811 qs_ks_update_qs_env 107 7.6 0.001 0.001 56.708 56.708 qs_rho_update_rho 107 7.7 0.001 0.001 55.945 55.945 calculate_rho_elec 107 8.7 1.414 1.414 55.944 55.944 sum_up_and_integrate 107 10.3 0.347 0.347 51.717 51.717 integrate_v_rspace 107 11.3 0.506 0.506 51.369 51.369 grid_collocate_task_list 107 9.7 50.282 50.282 50.282 50.282 grid_integrate_task_list 107 12.3 48.445 48.445 48.445 48.445 init_scf_loop 11 6.9 0.000 0.000 42.463 42.463 prepare_preconditioner 11 7.9 0.000 0.000 35.111 35.111 make_preconditioner 11 8.9 0.000 0.000 35.111 35.111 make_full_inverse_cholesky 11 9.9 0.000 0.000 32.985 32.985 qs_scf_new_mos 96 7.5 0.001 0.001 26.303 26.303 qs_scf_loop_do_ot 96 8.5 0.001 0.001 26.302 26.302 ot_scf_mini 96 9.5 0.003 0.003 24.579 24.579 dbcsr_multiply_generic 1966 12.4 0.180 0.180 21.986 21.986 init_scf_run 11 5.9 0.001 0.001 19.798 19.798 scf_env_initial_rho_setup 11 6.9 0.001 0.001 19.797 19.797 cp_fm_cholesky_invert 11 10.9 19.767 19.767 19.767 19.767 wfi_extrapolate 11 7.9 0.001 0.001 18.660 18.660 cp_gemm 81 9.0 0.000 0.000 15.245 15.245 cp_gemm_cosma 81 10.0 15.244 15.244 15.244 15.244 ot_mini 96 10.5 0.001 0.001 14.446 14.446 make_m2s 3932 13.4 0.058 0.058 12.249 12.249 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.852 11.852 qs_ot_get_derivative 96 11.5 0.001 0.001 7.845 7.845 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.753 7.753 pw_transfer 1295 11.6 0.089 0.089 7.356 7.356 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.146 7.146 fft_wrap_pw1pw2 1081 12.6 0.009 0.009 7.061 7.061 cp_fm_cholesky_decompose 22 10.9 6.888 6.888 6.888 6.888 qs_create_task_list 11 7.9 0.000 0.000 6.601 6.601 generate_qs_task_list 11 8.9 4.923 4.923 6.601 6.601 dbcsr_complete_redistribute 317 12.2 3.090 3.090 6.600 6.600 ot_diis_step 96 11.5 0.005 0.005 6.597 6.597 make_images 3932 14.4 2.386 2.386 6.594 6.594 qs_ot_get_p 107 10.4 0.001 0.001 6.405 6.405 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.151 6.151 fft_wrap_pw1pw2_140 439 13.2 0.695 0.695 6.017 6.017 dbcsr_make_dense_low 4961 15.5 0.101 0.101 5.730 5.730 make_dense_data 4961 16.5 4.865 4.865 5.610 5.610 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.598 5.598 apply_single 107 13.6 0.000 0.000 5.597 5.597 multiply_cannon 1966 13.4 0.894 0.894 5.368 5.368 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.308 5.308 dbcsr_copy 1855 11.9 0.285 0.285 5.305 5.305 dbcsr_make_images_dense 3386 14.7 0.024 0.024 5.138 5.138 dbcsr_copy_into_existing 22 7.9 4.947 4.947 4.948 4.948 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.940 4.940 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.940 4.940 qs_ot_p2m_diag 44 11.0 0.201 0.201 4.873 4.873 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.012 129.236 129.236 qs_mol_dyn_low 1 2.0 0.005 0.005 129.112 129.117 qs_forces 11 3.9 0.002 0.002 129.055 129.056 qs_energies 11 4.9 0.001 0.001 120.172 120.173 scf_env_do_scf 11 5.9 0.001 0.001 110.316 110.318 scf_env_do_scf_inner_loop 96 6.5 0.003 0.009 102.421 102.422 velocity_verlet 10 3.0 0.002 0.005 77.729 77.731 rebuild_ks_matrix 107 8.3 0.001 0.001 58.856 58.894 qs_ks_build_kohn_sham_matrix 107 9.3 0.020 0.022 58.856 58.894 sum_up_and_integrate 107 10.3 0.043 0.047 53.281 53.315 integrate_v_rspace 107 11.3 0.004 0.005 53.238 53.274 qs_ks_update_qs_env 107 7.6 0.001 0.001 51.788 51.820 qs_rho_update_rho 107 7.7 0.001 0.001 49.331 49.346 calculate_rho_elec 107 8.7 0.043 0.044 49.330 49.345 grid_integrate_task_list 107 12.3 46.509 47.077 46.509 47.077 grid_collocate_task_list 107 9.7 42.684 43.313 42.684 43.313 dbcsr_multiply_generic 1966 12.4 0.118 0.123 15.418 15.571 qs_scf_new_mos 96 7.5 0.001 0.001 12.431 12.479 qs_scf_loop_do_ot 96 8.5 0.001 0.001 12.430 12.478 ot_scf_mini 96 9.5 0.003 0.003 11.660 11.711 multiply_cannon 1966 13.4 0.196 0.202 10.257 10.451 multiply_cannon_loop 1966 14.4 0.198 0.213 9.275 9.630 rs_pw_transfer 878 11.9 0.014 0.016 7.099 8.185 mp_waitall_1 146670 16.2 7.771 8.092 7.771 8.092 init_scf_loop 11 6.9 0.000 0.000 7.875 7.875 init_scf_run 11 5.9 0.000 0.002 7.723 7.724 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.723 7.723 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.269 7.279 density_rs2pw 107 9.7 0.008 0.008 6.094 7.189 wfi_extrapolate 11 7.9 0.001 0.001 7.109 7.109 ot_mini 96 10.5 0.001 0.001 6.867 6.922 multiply_cannon_metrocomm3 15728 15.4 0.072 0.080 5.023 5.404 pw_transfer 1295 11.6 0.130 0.143 5.262 5.329 fft_wrap_pw1pw2 1081 12.6 0.012 0.013 4.990 5.059 potential_pw2rs 107 12.3 0.009 0.010 4.765 4.775 fft_wrap_pw1pw2_140 439 13.2 0.491 0.512 4.302 4.469 mp_waitany 8968 13.7 3.104 4.177 3.104 4.177 rs_pw_transfer_RS2PW_140 118 11.5 0.410 0.448 2.772 3.867 fft3d_ps 1081 14.6 2.008 2.134 3.734 3.805 make_m2s 3932 13.4 0.066 0.068 3.452 3.498 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.399 3.467 apply_single 107 13.6 0.001 0.001 3.399 3.466 ot_diis_step 96 11.5 0.004 0.005 3.463 3.464 qs_ot_get_derivative 96 11.5 0.001 0.001 3.372 3.424 multiply_cannon_multrec 15728 15.4 3.162 3.334 3.177 3.349 mp_alltoall_d11v 1998 13.7 2.241 3.295 2.241 3.295 rs_gather_matrices 107 12.3 0.120 0.131 1.912 2.955 make_images 3932 14.4 0.166 0.172 2.847 2.894 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=80.048, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=50.282, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.445, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.767, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.244, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.888, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=26.005999999999986, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=42.684, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.509, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.771, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.104, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.162, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.385 0.385 282.701 282.701 qs_energies 1 2.0 0.000 0.000 281.429 281.429 scf_env_do_scf 1 3.0 0.000 0.000 279.008 279.008 qs_ks_update_qs_env 8 5.0 0.000 0.000 261.399 261.399 rebuild_ks_matrix 7 6.0 0.000 0.000 261.291 261.291 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 261.291 261.291 hfx_ks_matrix 7 8.0 0.000 0.000 174.819 174.819 integrate_four_center 7 9.0 2.198 2.198 174.788 174.788 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 163.159 163.159 integrate_four_center_main 7 10.0 0.818 0.818 161.464 161.464 integrate_four_center_bin 455 11.0 160.647 160.647 160.647 160.647 init_scf_loop 1 4.0 0.000 0.000 115.834 115.834 cp_gemm 129 10.3 0.001 0.001 71.256 71.256 cp_gemm_cosma 129 11.3 71.255 71.255 71.255 71.255 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 41.048 41.048 admm_fit_mo_coeffs 7 9.0 0.000 0.000 39.305 39.305 admm_mo_merge_derivs 7 8.0 0.000 0.000 36.481 36.481 merge_mo_derivs_diag 7 9.0 0.022 0.022 36.481 36.481 purify_mo_diag 7 10.0 0.001 0.001 23.428 23.428 fit_mo_coeffs 7 10.0 0.000 0.000 15.877 15.877 prepare_preconditioner 1 5.0 0.000 0.000 13.707 13.707 make_preconditioner 1 6.0 0.000 0.000 13.707 13.707 integrate_four_center_load 7 10.0 0.001 0.001 10.737 10.737 hfx_load_balance 1 11.0 0.002 0.002 10.736 10.736 arnoldi_normal_ev 11 9.3 0.002 0.002 8.198 8.198 estimate_cond_num 1 7.0 0.000 0.000 8.124 8.124 build_subspace 28 9.5 0.015 0.015 8.093 8.093 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.204 0.210 189.873 189.874 qs_energies 1 2.0 0.000 0.000 189.523 189.524 scf_env_do_scf 1 3.0 0.000 0.000 188.951 188.951 qs_ks_update_qs_env 8 5.0 0.000 0.000 185.988 185.988 rebuild_ks_matrix 7 6.0 0.000 0.000 185.975 185.975 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 185.975 185.975 hfx_ks_matrix 7 8.0 0.000 0.001 173.833 173.834 integrate_four_center 7 9.0 0.098 0.407 173.817 173.818 integrate_four_center_main 7 10.0 0.005 0.005 158.706 161.790 integrate_four_center_bin 448 11.0 158.702 161.785 158.702 161.785 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 110.112 110.112 init_scf_loop 1 4.0 0.000 0.000 78.837 78.838 integrate_four_center_load 7 10.0 0.000 0.000 10.642 10.644 hfx_load_balance 1 11.0 0.001 0.002 10.642 10.644 mp_sync 70 11.3 3.639 6.596 3.639 6.596 hfx_load_balance_count 1 12.0 5.247 5.354 5.247 5.354 hfx_load_balance_bin 1 12.0 5.245 5.321 5.245 5.321 cp_gemm 129 10.3 0.000 0.001 4.930 4.936 cp_gemm_cosma 129 11.3 4.930 4.936 4.930 4.936 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=47.398000000000025, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=160.647, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=71.255, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.198, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.818, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.385, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.80299999999994, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=158.702, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=4.93, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.098, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.204, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.247, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.639, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.245, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 379.237 379.237 qs_energies 1 2.0 0.000 0.000 378.686 378.686 mp2_main 1 3.0 0.000 0.000 372.247 372.247 mp2_gpw_main 1 4.0 0.000 0.000 371.757 371.757 rpa_ri_compute_en 1 5.0 0.000 0.000 358.588 358.588 rpa_num_int 1 6.0 0.001 0.001 358.566 358.566 compute_mat_P_omega 1 7.0 0.002 0.002 171.512 171.512 compute_mat_P_omega_contract 10 8.0 11.811 11.811 169.929 169.929 dbt_total 2336 9.6 0.019 0.019 163.456 163.456 cp_gemm 105 8.4 0.000 0.000 157.945 157.945 cp_gemm_cosma 105 9.4 157.944 157.944 157.944 157.944 GW_matrix_operations 10 7.0 0.005 0.005 111.423 111.423 dbt_contract 787 11.0 47.353 47.353 100.158 100.158 dbt_copy 1103 10.7 20.625 20.625 62.009 62.009 compute_mat_P_omega_calc_M_occ 250 9.0 11.806 11.806 61.544 61.544 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 53.381 53.381 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 51.693 51.693 dbt_tas_total 1149 12.2 0.262 0.262 49.639 49.639 dbt_tas_multiply 807 12.1 0.004 0.004 48.378 48.378 dbt_tas_dbm 807 14.1 0.005 0.005 38.556 38.556 dbm_multiply 807 16.1 38.544 38.544 38.544 38.544 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 37.833 37.833 dbt_tas_mm_1N 524 15.1 0.002 0.002 23.553 23.553 compute_QP_energies 1 7.0 0.000 0.000 20.329 20.329 compute_self_energy_cubic_gw 1 8.0 0.098 0.098 20.329 20.329 dbt_tas_copy 574 11.4 16.300 16.300 19.682 19.682 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 17.818 17.818 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 14.366 14.366 dbt_tas_mm_2 251 15.0 0.002 0.002 13.271 13.271 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 13.153 13.153 dbt_tas_reserve_blocks_index 3261 13.7 7.726 7.726 13.031 13.031 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 12.723 12.723 dbt_copy_nocomm 251 12.0 11.013 11.013 12.614 12.614 dbt_reserve_blocks_index 2280 12.5 1.566 1.566 10.857 10.857 dbt_reserve_blocks_index_array 2222 11.6 0.011 0.011 10.819 10.819 cp_fm_cholesky_invert 10 8.0 9.337 9.337 9.337 9.337 contract_cubic_gw 21 9.0 0.000 0.000 8.651 8.651 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.009 53.671 53.672 qs_energies 1 2.0 0.001 0.001 53.556 53.557 mp2_main 1 3.0 0.000 0.000 52.090 52.091 mp2_gpw_main 1 4.0 0.000 0.001 52.033 52.034 rpa_ri_compute_en 1 5.0 0.000 0.000 50.150 50.151 rpa_num_int 1 6.0 0.001 0.001 50.142 50.143 dbt_total 2336 9.6 0.019 0.021 37.893 37.894 compute_mat_P_omega 1 7.0 0.001 0.002 36.686 36.694 compute_mat_P_omega_contract 10 8.0 0.700 0.727 36.361 36.366 dbt_contract 787 11.0 1.813 1.984 27.577 27.582 dbt_tas_total 1149 12.2 0.075 0.079 24.027 24.028 dbt_tas_multiply 807 12.1 0.003 0.004 23.939 23.941 dbt_tas_dbm 807 14.1 0.005 0.006 16.568 16.569 dbm_multiply 807 16.1 13.222 14.072 13.222 14.072 compute_mat_P_omega_calc_M_occ 250 9.0 0.692 0.717 12.092 12.092 dbt_copy 1111 10.7 3.900 4.114 8.762 9.033 cp_gemm 105 8.4 0.000 0.000 9.004 9.017 cp_gemm_cosma 105 9.4 9.003 9.017 9.003 9.017 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.981 8.981 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 7.544 7.544 dbt_tas_mm_1N 524 15.1 0.002 0.003 6.646 7.111 mp_sync 8706 11.6 6.048 7.076 6.048 7.076 dbt_tas_mm_2 251 15.0 0.002 0.002 7.030 7.030 GW_matrix_operations 10 7.0 0.001 0.002 5.773 5.781 compute_QP_energies 1 7.0 0.000 0.000 4.138 4.138 compute_self_energy_cubic_gw 1 8.0 0.005 0.006 4.136 4.138 dbt_communicate_buffer 1098 11.7 0.093 0.100 3.499 3.627 mp_waitall_2 3776 14.7 3.340 3.591 3.340 3.591 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.173 3.174 contract_cubic_gw 21 9.0 0.000 0.000 3.133 3.133 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.057 3.059 dbt_reserve_blocks_index 2849 12.4 0.101 0.108 2.301 2.541 dbt_reserve_blocks_index_array 2791 11.4 0.011 0.013 2.299 2.538 dbt_tas_reserve_blocks_index 3300 13.8 0.264 0.282 2.258 2.493 dbm_reserve_blocks 3696 14.8 2.063 2.279 2.063 2.279 dbt_tas_replicate 396 14.1 1.182 1.420 2.221 2.279 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 1.879 1.881 convert_to_new_pgrid 2421 14.1 0.034 0.038 1.568 1.676 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.002 1.646 1.650 dbm_copy 1608 15.1 1.524 1.633 1.524 1.633 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.468 1.472 scf_env_do_scf 1 3.0 0.000 0.000 1.412 1.412 scf_env_do_scf_inner_loop 17 4.0 0.001 0.001 1.412 1.412 mp_max_i 1991 9.8 0.972 1.250 0.972 1.250 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=98.471, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=157.944, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_contract", label="dbt_contract", y=47.353, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=38.544, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_copy", label="dbt_copy", y=20.625, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=16.3, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=16.345000000000006, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_contract", label="dbt_contract", y=1.813, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=13.222, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_copy", label="dbt_copy", y=3.9, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.048, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.34, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 703.390 703.390 qs_forces 1 2.0 0.000 0.000 702.555 702.555 rebuild_ks_matrix 7 6.6 0.000 0.000 693.027 693.027 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 693.027 693.027 hfx_ks_matrix 7 8.6 0.000 0.000 690.134 690.134 dbt_total 1051 10.5 0.010 0.010 565.228 565.228 hfx_ri_update_ks 7 9.6 0.000 0.000 376.194 376.194 hfx_ri_update_ks_Pmat 7 10.6 63.142 63.142 376.187 376.187 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 362.075 362.075 dbt_contract 283 11.7 117.662 117.662 356.866 356.866 qs_energies 1 3.0 0.000 0.000 340.417 340.417 scf_env_do_scf 1 4.0 0.000 0.000 340.011 340.011 qs_ks_update_qs_env 8 6.0 0.000 0.000 330.958 330.958 hfx_ri_update_forces 1 7.0 0.040 0.040 313.934 313.934 dbt_tas_total 533 12.7 3.019 3.019 227.112 227.112 dbt_tas_multiply 292 12.8 0.001 0.001 222.528 222.528 dbt_copy 444 11.5 101.099 101.099 203.764 203.764 dbt_tas_dbm 292 14.8 0.002 0.002 199.273 199.273 dbm_multiply 292 16.8 199.267 199.267 199.267 199.267 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 198.094 198.094 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 164.716 164.716 init_scf_loop 2 5.0 0.000 0.000 141.914 141.914 dbt_tas_mm_2 119 16.0 0.001 0.001 110.639 110.639 hfx_ri_forces_Pmat_2c_inv_2 9 8.0 0.000 0.000 57.792 57.792 dbt_tas_mm_3N 94 14.7 0.000 0.000 56.455 56.455 hfx_ri_forces_Pmat_metric 9 8.0 0.001 0.001 55.293 55.293 precalc_derivatives 1 8.0 0.008 0.008 54.060 54.060 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 44.926 44.926 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 43.808 43.808 dbt_tas_reserve_blocks_index 1569 14.6 16.807 16.807 37.270 37.270 dbt_tas_copy 287 12.3 27.888 27.888 35.249 35.249 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 34.150 34.150 dbt_reserve_blocks_index 1020 13.6 3.450 3.450 32.915 32.915 dbt_reserve_blocks_index_array 999 12.6 0.008 0.008 32.611 32.611 dbt_tas_mm_3T 77 17.1 0.000 0.000 32.023 32.023 hfx_ri_forces_Pmat_PQ_der 9 8.0 1.418 1.418 30.437 30.437 hfx_ri_forces_Pmat_Pmat_2 9 8.0 0.000 0.000 28.582 28.582 hfx_ri_forces_Pmat_2c_inv_1 1 8.0 4.398 4.398 27.648 27.648 build_3c_derivatives 2 9.0 1.206 1.206 26.886 26.886 get_force_from_3c_trace 18 8.0 26.268 26.268 26.268 26.268 dbt_split_blocks_generic 138 11.7 16.933 16.933 20.538 20.538 dbt_communicate_buffer 157 12.8 20.183 20.183 20.183 20.183 dbt_split_copyback 69 11.7 16.369 16.369 18.617 18.617 dbm_reserve_blocks 1813 15.4 16.757 16.757 16.757 16.757 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.011 104.532 104.533 qs_forces 1 2.0 0.000 0.000 104.363 104.363 rebuild_ks_matrix 7 6.6 0.000 0.000 103.254 103.255 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.003 103.254 103.255 hfx_ks_matrix 7 8.6 0.000 0.000 101.561 101.561 dbt_total 1051 10.5 0.010 0.012 93.123 93.124 dbt_contract 283 11.7 4.908 5.233 73.324 73.344 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 68.581 68.581 dbt_tas_total 533 12.7 0.117 0.217 67.229 67.229 dbt_tas_multiply 292 12.8 0.001 0.002 64.279 64.280 hfx_ri_update_forces 1 7.0 0.003 0.003 63.826 63.826 dbt_tas_dbm 292 14.8 0.002 0.003 49.649 49.649 dbm_multiply 292 16.8 44.266 45.871 44.266 45.871 hfx_ri_update_ks 7 9.6 0.000 0.000 37.734 37.734 hfx_ri_update_ks_Pmat 7 10.6 2.509 2.698 37.732 37.732 qs_energies 1 3.0 0.000 0.001 35.766 35.766 scf_env_do_scf 1 4.0 0.000 0.000 35.554 35.554 qs_ks_update_qs_env 8 6.0 0.000 0.000 34.674 34.675 dbt_tas_mm_2 119 16.0 0.001 0.001 21.423 21.423 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 20.090 20.090 hfx_ri_forces_Pmat_metric 9 8.0 0.001 0.001 18.871 18.871 dbt_copy 464 11.6 5.699 5.876 15.787 16.225 init_scf_loop 2 5.0 0.000 0.000 15.464 15.464 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 14.367 14.367 dbt_tas_mm_3T 77 17.1 0.001 0.001 12.655 13.376 dbt_tas_mm_3N 94 14.7 0.001 0.001 12.384 12.989 mp_sync 3797 12.3 9.360 12.760 9.360 12.760 hfx_ri_forces_Pmat_2c_inv_2 9 8.0 0.000 0.000 11.703 11.703 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 9.974 9.974 hfx_ri_forces_Pmat_PQ_der 9 8.0 0.068 0.071 9.319 9.319 hfx_ri_forces_Pmat_Pmat_2 9 8.0 0.000 0.000 7.868 7.888 dbt_tas_reserve_blocks_index 1602 14.7 0.576 0.608 6.392 6.779 dbm_reserve_blocks 1848 15.6 6.194 6.703 6.194 6.703 mp_waitall_2 1484 15.5 6.122 6.618 6.122 6.618 precalc_derivatives 1 8.0 0.003 0.003 5.823 5.824 dbt_reserve_blocks_index 1191 13.5 0.158 0.166 5.449 5.765 dbt_reserve_blocks_index_array 1170 12.5 0.008 0.009 5.411 5.727 dbt_tas_replicate 246 14.5 1.625 2.176 5.218 5.552 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.676 4.676 hfx_ri_forces_Pmat_2c_inv_1 1 8.0 0.159 0.167 4.591 4.596 convert_to_new_pgrid 876 14.8 0.033 0.043 3.720 4.171 dbm_copy 578 15.8 3.652 4.110 3.652 4.110 dbt_tas_communicate_buffer 498 15.8 0.021 0.024 3.324 3.714 dbt_communicate_buffer 328 12.3 0.022 0.023 3.162 3.281 build_3c_derivatives 2 9.0 0.615 0.677 3.251 3.261 dbt_tas_replicate_communicate_ 127 15.0 0.003 0.005 2.646 3.020 hfx_ri_forces_Pmat_Pmat_1 3 8.0 0.000 0.000 2.596 2.596 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 2.514 2.524 dbt_tas_copy 141 13.3 1.337 1.439 2.357 2.515 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 2.203 2.203 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=177.57500000000005, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=199.267, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_contract", label="dbt_contract", y=117.662, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_copy", label="dbt_copy", y=101.099, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=63.142, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=27.888, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=16.757, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=24.137, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=44.266, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_contract", label="dbt_contract", y=4.908, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_copy", label="dbt_copy", y=5.699, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=2.509, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=1.337, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.194, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=9.36, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=6.122, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 263.422 263.422 qs_energies 1 2.0 0.000 0.000 263.175 263.175 mp2_main 1 3.0 0.000 0.000 249.575 249.575 mp2_gpw_main 1 4.0 0.002 0.002 248.426 248.426 mp2_ri_gpw_compute_in 1 5.0 0.851 0.851 179.811 179.811 mp2_ri_gpw_compute_in_loop 1 6.0 0.024 0.024 148.884 148.884 mp2_eri_3c_integrate_gpw 2656 7.0 0.030 0.030 107.588 107.588 integrate_v_rspace 2666 8.0 4.749 4.749 87.593 87.593 grid_integrate_task_list 2666 9.0 79.756 79.756 79.756 79.756 mp2_ri_gpw_compute_en 1 5.0 0.053 0.053 68.560 68.560 mp2_ri_gpw_compute_en_RI_loop 1 6.0 33.619 33.619 64.953 64.953 calculate_wavefunction 5312 9.0 28.093 28.093 36.851 36.851 get_2c_integrals 1 6.0 0.000 0.000 30.018 30.018 compute_2c_integrals 1 7.0 0.012 0.012 28.078 28.078 compute_2c_integrals_loop_lm 1 8.0 0.026 0.026 28.037 28.037 mp2_eri_2c_integrate_gpw 1 9.0 4.758 4.758 28.011 28.011 dbcsr_multiply_generic 5322 8.0 0.279 0.279 27.462 27.462 ao_to_mo_and_store_B_mult_1 2656 7.0 0.019 0.019 27.427 27.427 mp2_ri_gpw_compute_en_expansio 2080 7.0 20.304 20.304 20.304 20.304 multiply_cannon 5322 9.0 4.338 4.338 17.121 17.121 pw_transfer 63872 10.6 2.040 2.040 15.650 15.650 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 9.261 9.261 13.676 13.676 fft_wrap_pw1pw2 53228 11.4 0.224 0.224 13.302 13.302 scf_env_do_scf 1 3.0 0.000 0.000 12.908 12.908 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 12.908 12.908 multiply_cannon_loop 5322 10.0 0.150 0.150 11.298 11.298 mp2_ri_gpw_compute_en_ener 2080 7.0 11.017 11.017 11.017 11.017 qs_scf_new_mos 10 5.0 0.000 0.000 10.922 10.922 multiply_cannon_multrec 5322 11.0 9.990 9.990 10.044 10.044 fft_wrap_pw1pw2_20 21271 12.4 0.515 0.515 8.927 8.927 fft3d_s 53229 13.4 8.784 8.784 8.852 8.852 eigensolver 11 5.8 0.001 0.001 7.643 7.643 make_m2s 10644 9.0 0.085 0.085 7.507 7.507 cp_fm_diag_elpa 11 6.8 0.000 0.000 7.023 7.023 cp_fm_diag_elpa_base 11 7.8 6.806 6.806 7.023 7.023 make_images 10644 10.0 2.576 2.576 6.975 6.975 potential_pw2rs 5322 10.0 0.256 0.256 5.985 5.985 copy_dbcsr_to_fm 2679 8.0 0.047 0.047 5.376 5.376 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.010 64.306 64.307 qs_energies 1 2.0 0.001 0.001 64.201 64.201 mp2_main 1 3.0 0.000 0.000 61.011 61.011 mp2_gpw_main 1 4.0 0.001 0.001 60.854 60.854 mp2_ri_gpw_compute_in 1 5.0 0.052 0.056 34.276 35.646 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 31.588 32.942 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 28.415 29.753 integrate_v_rspace 93 8.1 0.190 0.207 28.126 29.474 grid_integrate_task_list 93 9.1 27.556 28.917 27.556 28.917 mp2_ri_gpw_compute_en 1 5.0 0.274 0.279 26.481 26.865 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.481 2.835 22.166 22.616 mp2_ri_gpw_compute_en_expansio 65 7.0 9.572 9.897 9.572 9.897 mp2_ri_gpw_compute_en_comm 65 7.0 5.066 5.409 9.170 9.861 mp_sendrecv_dm3 390 8.0 4.104 5.043 4.104 5.043 scf_env_do_scf 1 3.0 0.000 0.000 3.004 3.005 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 3.004 3.005 dbcsr_multiply_generic 176 8.0 0.012 0.013 2.589 2.768 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.558 2.735 get_2c_integrals 1 6.0 0.000 0.000 2.517 2.549 mp2_ri_create_group 1 6.0 0.000 0.000 2.213 2.231 replicate_iaK_2intgroup 1 7.0 1.582 1.719 2.103 2.231 compute_2c_integrals 1 7.0 0.006 0.008 2.149 2.163 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 1.981 2.034 mp2_eri_2c_integrate_gpw 1 9.0 0.434 0.464 1.980 2.032 mp2_ri_get_sizes 1 6.0 0.000 0.000 1.358 1.743 mp_min_d 1 7.0 1.358 1.743 1.358 1.743 calculate_wavefunction 166 9.0 0.889 0.920 1.635 1.675 pw_transfer 2120 10.5 0.081 0.087 1.520 1.546 make_m2s 352 9.0 0.004 0.005 1.332 1.465 make_images 352 10.0 0.066 0.067 1.319 1.451 fft_wrap_pw1pw2 1768 11.4 0.009 0.009 1.410 1.433 qs_scf_new_mos 10 5.0 0.000 0.000 1.391 1.402 eigensolver 11 5.8 0.001 0.001 1.387 1.388 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=90.63300000000004, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=79.756, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=33.619, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=28.093, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_expansio", label="mp2_ri_gpw_compute_en_expansio", y=20.304, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_ener", label="mp2_ri_gpw_compute_en_ener", y=11.017, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_comm", label="mp2_ri_gpw_compute_en_comm", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=14.637999999999998, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.556, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=2.481, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.889, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_expansio", label="mp2_ri_gpw_compute_en_expansio", y=9.572, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_ener", label="mp2_ri_gpw_compute_en_ener", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_comm", label="mp2_ri_gpw_compute_en_comm", y=5.066, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=4.104, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.101 0.101 193.382 193.382 qs_energies 1 2.0 0.000 0.000 191.580 191.580 scf_env_do_scf 1 3.0 0.000 0.000 181.023 181.023 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 181.023 181.023 qs_scf_new_mos 15 5.0 0.000 0.000 80.118 80.118 qs_ks_update_qs_env 15 5.0 0.000 0.000 69.329 69.329 rebuild_ks_matrix 15 6.0 0.000 0.000 68.950 68.950 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 68.949 68.949 eigensolver 15 6.0 0.002 0.002 66.668 66.668 cp_fm_diag_elpa 15 7.0 0.000 0.000 52.419 52.419 cp_fm_diag_elpa_base 15 8.0 47.624 47.624 52.419 52.419 qs_vxc_create 15 8.0 0.000 0.000 44.705 44.705 calculate_dispersion_nonloc 15 9.0 8.920 8.920 38.904 38.904 pw_transfer 1191 9.8 0.091 0.091 26.636 26.636 fft_wrap_pw1pw2 1086 10.9 0.012 0.012 26.343 26.343 qs_rho_update_rho 16 5.0 0.000 0.000 25.370 25.370 calculate_rho_elec 16 6.0 0.343 0.343 25.370 25.370 grid_collocate_task_list 16 7.0 23.807 23.807 23.807 23.807 sum_up_and_integrate 15 8.0 0.076 0.076 22.600 22.600 integrate_v_rspace 15 9.0 0.033 0.033 22.524 22.524 grid_integrate_task_list 15 10.0 21.901 21.901 21.901 21.901 fft_wrap_pw1pw2_150 765 12.0 3.324 3.324 19.968 19.968 fft3d_s 1087 12.8 10.932 10.932 10.943 10.943 copy_dbcsr_to_fm 16 5.9 0.001 0.001 10.852 10.852 pw_scatter_s 585 13.0 10.423 10.423 10.423 10.423 cp_fm_cholesky_restore 45 7.0 9.568 9.568 9.568 9.568 cp_fm_upper_to_full 30 8.0 9.474 9.474 9.474 9.474 dbcsr_complete_redistribute 46 8.3 3.436 3.436 9.405 9.405 vdW_energy 15 10.0 7.660 7.660 7.660 7.660 gspace_mixing 14 5.0 0.273 0.273 7.587 7.587 broyden_mixing 14 6.0 6.837 6.837 6.837 6.837 fft_wrap_pw1pw2_200 197 11.5 0.344 0.344 6.120 6.120 xc_vxc_pw_create 15 9.0 1.542 1.542 5.800 5.800 init_scf_run 1 3.0 0.000 0.000 5.025 5.025 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.716 4.716 dbcsr_finalize 159 9.9 0.020 0.020 3.997 3.997 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.016 86.565 86.566 qs_energies 1 2.0 0.000 0.001 86.177 86.177 scf_env_do_scf 1 3.0 0.000 0.000 81.038 81.038 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 81.038 81.038 qs_ks_update_qs_env 15 5.0 0.000 0.000 39.692 39.726 rebuild_ks_matrix 15 6.0 0.000 0.000 39.643 39.678 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 39.643 39.677 qs_rho_update_rho 16 5.0 0.000 0.000 24.112 24.113 calculate_rho_elec 16 6.0 0.011 0.012 24.112 24.113 sum_up_and_integrate 15 8.0 0.013 0.015 23.414 23.468 integrate_v_rspace 15 9.0 0.001 0.001 23.401 23.454 grid_collocate_task_list 16 7.0 22.195 22.515 22.195 22.515 grid_integrate_task_list 15 10.0 21.524 22.011 21.524 22.011 qs_scf_new_mos 15 5.0 0.001 0.001 17.718 17.866 eigensolver 15 6.0 0.002 0.002 16.229 16.244 qs_vxc_create 15 8.0 0.001 0.001 15.696 15.707 calculate_dispersion_nonloc 15 9.0 1.400 1.428 12.778 12.792 pw_transfer 1191 9.8 0.137 0.151 11.974 12.109 cp_fm_diag_elpa 15 7.0 0.000 0.000 11.858 11.864 cp_fm_diag_elpa_base 15 8.0 11.605 11.655 11.853 11.856 fft_wrap_pw1pw2 1086 10.9 0.020 0.024 11.673 11.824 fft3d_ps 1086 12.9 5.093 5.211 8.834 9.139 fft_wrap_pw1pw2_150 765 12.0 0.678 0.705 7.830 7.869 cp_fm_cholesky_restore 45 7.0 4.135 4.191 4.135 4.191 fft_wrap_pw1pw2_200 197 11.5 0.366 0.396 3.689 3.789 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.172 3.172 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.738 2.995 xc_vxc_pw_create 15 9.0 0.056 0.072 2.917 2.931 mp_alltoall_z22v 1086 14.9 2.239 2.773 2.239 2.773 rs_pw_transfer 158 9.4 0.002 0.003 1.794 2.297 vdW_energy 15 10.0 2.087 2.202 2.087 2.202 x_to_yz 585 14.0 0.933 0.976 2.106 2.184 density_rs2pw 16 7.0 0.002 0.002 1.748 2.082 build_core_ppnl 1 5.0 1.823 2.018 1.823 2.018 yz_to_x 501 13.7 0.536 0.600 1.602 1.920 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=69.12700000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=47.624, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.807, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.901, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.932, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.423, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=9.568, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=22.013000000000005, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=11.605, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.195, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.524, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.135, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.093, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.085 0.085 315.397 315.397 qs_energies 1 2.0 0.000 0.000 315.234 315.234 ls_scf 1 3.0 0.000 0.000 313.441 313.441 ls_scf_main 1 4.0 0.002 0.002 300.030 300.030 density_matrix_trs4 11 5.0 0.011 0.011 179.384 179.384 dbcsr_multiply_generic 185 6.1 0.622 0.622 113.943 113.943 ls_scf_dm_to_ks 11 5.0 0.000 0.000 113.548 113.548 matrix_ls_to_qs 11 6.0 0.000 0.000 109.082 109.082 multiply_cannon 185 7.1 3.540 3.540 78.951 78.951 dbcsr_complete_redistribute 23 7.5 42.523 42.523 58.096 58.096 multiply_cannon_loop 185 8.1 0.391 0.391 57.086 57.086 dbcsr_copy_into_existing 11 7.0 56.103 56.103 56.103 56.103 multiply_cannon_multrec 185 9.1 54.861 54.861 54.916 54.916 matrix_decluster 11 7.0 0.000 0.000 52.978 52.978 arnoldi_extremal 12 6.1 0.000 0.000 46.393 46.393 arnoldi_normal_ev 12 7.1 0.028 0.028 46.392 46.392 build_subspace 23 8.1 0.134 0.134 45.718 45.718 dbcsr_matrix_vector_mult 652 9.0 0.281 0.281 34.666 34.666 dbcsr_matrix_vector_mult_local 652 10.0 33.069 33.069 33.078 33.078 make_m2s 370 7.1 0.031 0.031 28.646 28.646 make_images 370 8.1 7.453 7.453 26.243 26.243 dbcsr_finalize 646 7.5 0.210 0.210 21.095 21.095 dbcsr_merge_all 597 8.5 3.566 3.566 19.070 19.070 setup_rec_index_2d 370 8.1 18.175 18.175 18.175 18.175 dbcsr_sort_indices 1103 9.9 14.644 14.644 14.644 14.644 tree_to_linear_d 110 9.4 13.252 13.252 13.252 13.252 quick_finalize 395 10.0 0.473 0.473 12.486 12.486 ls_scf_init_scf 1 4.0 0.000 0.000 12.356 12.356 ls_scf_init_matrix_S 1 5.0 0.000 0.000 11.852 11.852 dbcsr_special_finalize 370 9.1 0.003 0.003 11.505 11.505 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 10.945 10.945 dbcsr_dot_sd 144 6.3 9.079 9.079 9.080 9.080 dbcsr_frobenius_norm 142 6.1 7.739 7.739 7.741 7.741 matrix_qs_to_ls 12 5.1 0.000 0.000 7.386 7.386 matrix_cluster 12 6.1 0.000 0.000 7.386 7.386 make_images_data 370 9.1 0.010 0.010 7.093 7.093 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.009 94.285 94.286 qs_energies 1 2.0 0.000 0.000 94.189 94.189 ls_scf 1 3.0 0.000 0.000 94.109 94.110 ls_scf_main 1 4.0 0.001 0.003 90.382 90.383 density_matrix_trs4 11 5.0 0.009 0.013 86.539 86.638 dbcsr_multiply_generic 185 6.1 0.077 0.092 81.230 81.456 multiply_cannon 185 7.1 0.042 0.046 67.734 68.577 multiply_cannon_loop 185 8.1 0.215 0.230 63.903 64.979 multiply_cannon_multrec 1480 9.1 41.582 43.636 42.058 44.103 mp_waitall_1 11936 10.3 19.897 22.791 19.897 22.791 multiply_cannon_metrocomm3 1480 9.1 0.019 0.021 11.727 15.851 make_m2s 370 7.1 0.034 0.038 9.157 9.249 make_images 370 8.1 0.703 0.731 9.038 9.127 multiply_cannon_metrocomm1 1480 9.1 0.011 0.013 4.813 7.342 calculate_norms 2960 9.1 5.012 5.212 5.012 5.212 make_images_data 370 9.1 0.012 0.013 3.752 4.167 mp_sum_l 1039 5.9 3.048 3.958 3.048 3.958 arnoldi_extremal 12 6.1 0.000 0.001 3.882 3.898 arnoldi_normal_ev 12 7.1 0.002 0.008 3.882 3.897 build_subspace 23 8.1 0.039 0.054 3.753 3.756 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.323 3.390 hybrid_alltoall_any 393 9.9 0.321 1.612 3.063 3.343 dbcsr_matrix_vector_mult 652 9.0 0.019 0.081 3.123 3.209 dbcsr_complete_redistribute 23 7.5 1.875 2.006 2.926 3.053 matrix_ls_to_qs 11 6.0 0.000 0.000 2.883 3.012 ls_scf_init_scf 1 4.0 0.000 0.000 2.868 2.869 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 2.116 2.858 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.827 2.839 matrix_decluster 11 7.0 0.000 0.000 2.624 2.749 make_images_pack 370 9.1 2.427 2.613 2.432 2.619 dbcsr_matrix_vector_mult_local 652 10.0 2.461 2.609 2.465 2.615 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.587 2.589 buffer_matrices_ensure_size 370 8.1 2.229 2.334 2.229 2.334 dbcsr_add_d 280 6.0 0.002 0.002 2.061 2.125 dbcsr_add_anytype 280 7.0 1.120 1.180 2.060 2.123 dbcsr_finalize 646 7.5 0.014 0.015 1.898 1.974 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=110.666, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=56.103, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=54.861, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=42.523, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=33.069, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.175, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=20.409999999999997, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=41.582, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.875, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.461, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.012, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.048, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=19.897, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.005 95.767 95.767 lib_test 1 2.0 0.000 0.000 95.761 95.761 dbcsr_run_tests 3 3.0 0.002 0.002 95.761 95.761 test_multiplies_multiproc 3 4.0 0.001 0.001 76.219 76.219 dbcsr_redistribute 9 5.0 48.020 48.020 51.682 51.682 dbcsr_multiply_generic 9 5.0 0.001 0.001 22.669 22.669 dbcsr_make_random_matrix 9 4.0 14.191 14.191 19.455 19.455 multiply_cannon 9 6.0 0.002 0.002 16.122 16.122 multiply_cannon_loop 9 7.0 0.003 0.003 15.612 15.612 multiply_cannon_multrec 9 8.0 15.607 15.607 15.608 15.608 dbcsr_finalize 27 5.7 0.004 0.004 9.123 9.123 dbcsr_merge_all 18 6.5 3.297 3.297 8.391 8.391 mp_alltoall_d11v 27 6.0 3.334 3.334 3.334 3.334 tree_to_linear_d 9 7.0 3.179 3.179 3.179 3.179 dbcsr_data_release 975 7.6 2.433 2.433 2.433 2.433 make_m2s 18 6.0 0.001 0.001 2.110 2.110 make_images 18 7.0 0.658 0.658 2.039 2.039 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 26.092 26.093 lib_test 1 2.0 0.000 0.002 26.060 26.082 dbcsr_run_tests 3 3.0 0.001 0.001 26.058 26.081 test_multiplies_multiproc 3 4.0 0.001 0.001 24.916 24.976 dbcsr_multiply_generic 9 5.0 0.002 0.002 23.006 23.111 multiply_cannon 9 6.0 0.002 0.003 20.698 21.073 multiply_cannon_loop 9 7.0 0.004 0.004 20.265 20.641 multiply_cannon_multrec 72 8.0 16.983 17.439 16.984 17.441 mp_waitall_1 576 9.2 3.692 4.216 3.692 4.216 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 2.907 3.499 dbcsr_make_random_matrix 9 4.0 0.886 0.910 1.099 1.152 mp_sum_l 310 2.7 0.560 1.120 0.560 1.120 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.555 1.116 make_m2s 18 6.0 0.001 0.001 0.956 0.993 make_images 18 7.0 0.026 0.028 0.953 0.990 dbcsr_finalize 27 5.7 0.001 0.001 0.845 0.926 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.361 0.893 dbcsr_merge_all 18 6.5 0.139 0.171 0.749 0.856 dbcsr_data_release 444 7.6 0.653 0.756 0.653 0.756 dbcsr_redistribute 9 5.0 0.384 0.438 0.671 0.702 dbcsr_destroy 111 5.9 0.006 0.059 0.565 0.659 make_images_data 18 8.0 0.001 0.001 0.473 0.556 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=8.884999999999991, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=48.02, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.607, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=14.191, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.334, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.297, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.433, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.7950000000000017, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.384, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.983, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.886, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.139, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.653, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.56, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.692, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.049 0.049 147.393 147.393 qs_mol_dyn_low 1 2.0 0.004 0.004 145.402 145.402 velocity_verlet 5 3.0 0.004 0.004 118.901 118.901 qmmm_el_coupling 6 3.8 0.000 0.000 68.583 68.583 qmmm_elec_with_gaussian 6 4.8 0.184 0.184 68.577 68.577 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 67.609 67.609 qmmm_elec_gaussian_low_G 6 6.8 66.053 66.053 66.053 66.053 qs_forces 6 3.8 0.001 0.001 57.034 57.034 qs_energies 6 4.8 0.000 0.000 50.834 50.834 scf_env_do_scf 6 5.8 0.001 0.001 46.905 46.905 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 39.390 39.390 rebuild_ks_matrix 45 8.4 0.000 0.000 38.600 38.600 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 38.600 38.600 qs_ks_update_qs_env 45 7.8 0.000 0.000 33.156 33.156 pw_transfer 966 11.9 0.069 0.069 23.285 23.285 fft_wrap_pw1pw2 801 13.0 0.009 0.009 22.949 22.949 fft_wrap_pw1pw2_150 507 14.3 2.377 2.377 22.421 22.421 qs_vxc_create 45 10.4 0.001 0.001 20.831 20.831 xc_vxc_pw_create 45 11.4 4.189 4.189 20.830 20.830 fist_calc_energy_force 6 3.8 0.002 0.002 10.936 10.936 qs_rho_update_rho 45 7.9 0.000 0.000 10.425 10.425 calculate_rho_elec 45 8.9 0.885 0.885 10.424 10.424 pw_scatter_s 429 15.4 10.231 10.231 10.231 10.231 force_nonbond 6 4.8 9.710 9.710 9.710 9.710 xc_rho_set_and_dset_create 45 12.4 0.247 0.247 9.571 9.571 fft3d_s 802 15.0 8.965 8.965 8.976 8.976 qmmm_forces 6 3.8 0.001 0.001 8.063 8.063 qmmm_forces_with_gaussian 6 4.8 0.127 0.127 7.552 7.552 init_scf_loop 6 6.8 0.000 0.000 7.509 7.509 pw_integral_ab 2539 7.4 7.360 7.360 7.360 7.360 qs_ks_ddapc 45 10.4 0.001 0.001 6.498 6.498 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.484 6.484 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.456 5.456 qmmm_forces_gaussian_low_G 6 6.8 5.416 5.416 5.416 5.416 pw_poisson_solve 51 9.9 2.241 2.241 5.169 5.169 grid_collocate_task_list 45 9.9 4.869 4.869 4.869 4.869 density_rs2pw 45 9.9 0.003 0.003 4.670 4.670 sum_up_and_integrate 45 10.4 0.235 0.235 4.344 4.344 integrate_v_rspace 45 11.4 0.014 0.014 4.109 4.109 cp_ddapc_apply_CD 45 11.4 0.006 0.006 4.028 4.028 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.036 87.633 87.635 qs_mol_dyn_low 1 2.0 0.005 0.005 86.034 86.128 qs_forces 6 3.8 0.001 0.001 63.945 63.945 qs_energies 6 4.8 0.001 0.001 60.976 60.976 scf_env_do_scf 6 5.8 0.000 0.001 59.423 59.423 scf_env_do_scf_inner_loop 113 6.2 0.003 0.009 57.029 57.030 rebuild_ks_matrix 119 8.1 0.000 0.000 42.171 42.190 qs_ks_build_kohn_sham_matrix 119 9.1 0.022 0.024 42.171 42.189 qs_ks_update_qs_env 119 7.3 0.001 0.001 39.655 39.673 velocity_verlet 5 3.0 0.002 0.003 35.751 35.756 pw_transfer 2446 11.8 0.286 0.317 27.057 27.199 fft_wrap_pw1pw2 2059 12.8 0.034 0.037 26.217 26.385 fft_wrap_pw1pw2_150 1321 14.0 2.289 2.517 25.419 25.599 qs_vxc_create 119 10.1 0.003 0.004 21.426 21.430 xc_vxc_pw_create 119 11.1 0.448 0.640 21.422 21.426 fft3d_ps 2059 14.8 11.774 12.779 19.876 20.147 qs_rho_update_rho 119 7.3 0.001 0.001 16.617 16.618 calculate_rho_elec 119 8.3 0.087 0.096 16.616 16.617 sum_up_and_integrate 119 10.1 0.089 0.096 15.041 15.088 integrate_v_rspace 119 11.1 0.004 0.005 14.953 15.005 qmmm_forces 6 3.8 0.003 0.003 12.259 12.260 qmmm_forces_with_gaussian 6 4.8 0.388 0.475 11.892 12.056 rs_pw_transfer 988 11.5 0.015 0.018 11.406 11.974 xc_rho_set_and_dset_create 119 12.1 0.505 0.596 10.057 10.592 density_rs2pw 119 9.3 0.011 0.012 10.074 10.564 potential_pw2rs 119 12.1 0.011 0.012 8.909 8.920 qmmm_el_coupling 6 3.8 0.000 0.000 8.646 8.681 qmmm_elec_with_gaussian 6 4.8 0.349 0.476 8.643 8.677 grid_collocate_task_list 119 9.3 6.224 6.629 6.224 6.629 mp_alltoall_z22v 2059 16.8 5.013 6.469 5.013 6.469 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.804 5.917 grid_integrate_task_list 119 12.1 5.626 5.883 5.626 5.883 rs_pw_transfer_PW2RS_150 125 13.9 2.545 2.616 4.973 5.021 qmmm_forces_gaussian_low_G 6 6.8 4.755 4.854 4.755 4.854 rs_pw_transfer_RS2PW_150 125 11.2 2.042 2.239 4.150 4.704 x_to_yz 1095 16.3 1.864 2.043 4.441 4.703 yz_to_x 964 15.3 1.175 1.370 3.611 4.590 pw_restrict_s3 18 5.8 2.370 2.414 4.467 4.573 mp_waitany 4028 12.8 3.594 4.511 3.594 4.511 qs_scf_new_mos 113 7.2 0.001 0.001 3.718 3.727 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.718 3.726 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.570 3.607 pw_prolongate_s3 18 6.8 1.889 1.921 3.569 3.607 ot_scf_mini 113 9.2 0.002 0.002 3.554 3.565 dbcsr_multiply_generic 2588 12.3 0.099 0.114 3.413 3.488 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.388 3.458 qs_ks_ddapc 119 10.1 0.003 0.003 2.923 3.076 pw_integral_ab 2761 7.7 2.223 2.265 2.611 2.800 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.527 2.528 qmmm_elec_gaussian_low_G 6 6.8 2.441 2.505 2.441 2.505 mp_sum_dm3 33 5.7 2.369 2.481 2.369 2.481 init_scf_loop 6 6.8 0.000 0.000 2.391 2.391 ot_mini 113 10.2 0.001 0.001 2.247 2.259 pw_gather_p 964 14.3 1.995 2.177 1.995 2.177 mp_waitall_1 188862 16.2 1.974 2.148 1.974 2.148 pw_scatter_p 1095 15.3 1.956 2.060 1.956 2.060 pw_derive 732 12.5 1.718 1.867 1.718 1.867 qs_ot_get_derivative 113 11.2 0.001 0.001 1.774 1.782 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=34.789, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=66.053, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.231, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=9.71, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.965, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=7.36, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.416, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.869, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=49.577, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.441, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=2.223, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.755, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.224, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=11.774, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.013, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.626, yerr=0.0 Summary: Performance test took 61 minutes. Status: OK Removing intermediate container 44340e0ffe5c ---> 1f332c373181 Step 41/41 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 44132ac8526e Removing intermediate container 44132ac8526e ---> 31a58fa6ec4a [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 31a58fa6ec4a Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-b51:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-03-07 12:56:08+00:00