StartDate: 2022-03-14 19:03:06+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 820d80c352ea2fde9d9e3a20ea6232d72d631dfb CommitTime: 2022-03-14 17:15:56 +0100 CommitAuthor: abussy CommitSubject: RI_HFX| much better sparsity management in RHO flavor forces Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=820d80c352ea2fde9d9e3a20ea6232d72d631dfb Sending build context to Docker daemon 362.4MB Step 1/41 : FROM ubuntu:20.04 20.04: Pulling from library/ubuntu 7c3b88808835: Already exists Digest: sha256:8ae9bafbb64f63a50caab98fd3a5e37b3eb837a3e0780b78e5218e63193961f9 Status: Downloaded newer image for ubuntu:20.04 ---> 2b4cba85892a Step 2/41 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 2478d1e60fc2 Step 3/41 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> f04a754c1b4e Step 4/41 : RUN ./install_requirements.sh ubuntu:20.04 ---> Using cache ---> b7c4b3bf2ba3 Step 5/41 : RUN mkdir scripts ---> Using cache ---> 0be4797ba2b9 Step 6/41 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 20eb19422415 Step 7/41 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> bf33731cd10d Step 8/41 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --dry-run ---> Using cache ---> 19b0b3c97393 Step 9/41 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 9fcf7ef497eb Step 10/41 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 5196fa37023c Step 11/41 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> dd293eb974d0 Step 12/41 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> e76392d58f21 Step 13/41 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> a0c151ba1669 Step 14/41 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 89cd0e6c3bff Step 15/41 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 3f34682c7ad5 Step 16/41 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> f53b7bbcf5ed Step 17/41 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 6059648341ff Step 18/41 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> a38ee222da95 Step 19/41 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 033717c35479 Step 20/41 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> ce105c9308f4 Step 21/41 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> da58aaf5626a Step 22/41 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 88850f0a7d75 Step 23/41 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 6180e9da3590 Step 24/41 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> b03deb66bd99 Step 25/41 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> ea28303902dd Step 26/41 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 01ae9c180998 Step 27/41 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 0532b5422729 Step 28/41 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 78e52c29e47b Step 29/41 : WORKDIR /opt/cp2k ---> Using cache ---> 84f01a434462 Step 30/41 : COPY ./Makefile . ---> Using cache ---> 6f3765799877 Step 31/41 : COPY ./src ./src ---> 6623f8db27bd Step 32/41 : COPY ./exts ./exts ---> 3a050ff45994 Step 33/41 : COPY ./tools/build_utils ./tools/build_utils ---> 17213d57e1bb Step 34/41 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true )" ---> Running in 026f3362b87f './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 026f3362b87f ---> d79af7727eeb Step 35/41 : COPY ./data ./data ---> dbb145520018 Step 36/41 : COPY ./tests ./tests ---> 95072d7375bf Step 37/41 : COPY ./tools/regtesting ./tools/regtesting ---> c37ab66c3301 Step 38/41 : COPY ./benchmarks ./benchmarks ---> bfa1723a797a Step 39/41 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 0138cc86b3a8 Step 40/41 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 8c4f43dcf59a ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.034 169.321 169.321 qs_mol_dyn_low 1 2.0 0.004 0.004 168.498 168.498 qs_forces 11 3.9 0.002 0.002 168.440 168.440 qs_energies 11 4.9 0.001 0.001 158.185 158.185 scf_env_do_scf 11 5.9 0.001 0.001 125.454 125.454 velocity_verlet 10 3.0 0.002 0.002 118.233 118.233 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 85.237 85.237 init_scf_loop 11 6.9 0.000 0.000 40.020 40.020 prepare_preconditioner 11 7.9 0.000 0.000 36.021 36.021 make_preconditioner 11 8.9 0.000 0.000 36.021 36.021 rebuild_ks_matrix 119 8.3 0.001 0.001 34.077 34.077 qs_ks_build_kohn_sham_matrix 119 9.3 0.021 0.021 34.076 34.076 make_full_inverse_cholesky 11 9.9 0.000 0.000 33.998 33.998 qs_ks_update_qs_env 119 7.6 0.001 0.001 31.925 31.925 qs_rho_update_rho 119 7.7 0.001 0.001 29.864 29.864 calculate_rho_elec 119 8.7 1.568 1.568 29.863 29.863 qs_scf_new_mos 108 7.5 0.001 0.001 29.592 29.592 qs_scf_loop_do_ot 108 8.5 0.001 0.001 29.591 29.591 ot_scf_mini 108 9.5 0.004 0.004 27.525 27.525 dbcsr_multiply_generic 2286 12.5 0.202 0.202 25.750 25.750 grid_collocate_task_list 119 9.7 23.220 23.220 23.220 23.220 sum_up_and_integrate 119 10.3 0.414 0.414 21.046 21.046 integrate_v_rspace 119 11.3 0.588 0.588 20.632 20.632 cp_fm_cholesky_invert 11 10.9 20.490 20.490 20.490 20.490 grid_integrate_task_list 119 12.3 17.280 17.280 17.280 17.280 init_scf_run 11 5.9 0.001 0.001 16.838 16.838 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.837 16.837 ot_mini 108 10.5 0.001 0.001 16.602 16.602 wfi_extrapolate 11 7.9 0.001 0.001 15.975 15.975 cp_gemm 81 9.0 0.000 0.000 15.457 15.457 cp_gemm_cosma 81 10.0 15.456 15.456 15.456 15.456 make_m2s 4572 13.5 0.072 0.072 14.614 14.614 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.796 10.796 pw_transfer 1439 11.6 0.103 0.103 8.639 8.639 qs_ot_get_derivative 108 11.5 0.002 0.002 8.421 8.421 fft_wrap_pw1pw2 1201 12.6 0.011 0.011 8.298 8.298 ot_diis_step 108 11.5 0.006 0.006 8.177 8.177 make_images 4572 14.5 2.771 2.771 7.721 7.721 cp_fm_cholesky_decompose 22 10.9 7.264 7.264 7.264 7.264 fft_wrap_pw1pw2_140 487 13.2 0.888 0.888 7.082 7.082 dbcsr_make_dense_low 5837 15.5 0.102 0.102 7.063 7.063 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.955 6.955 apply_single 119 13.6 0.001 0.001 6.954 6.954 make_dense_data 5837 16.5 6.236 6.236 6.936 6.936 qs_ot_get_p 119 10.4 0.001 0.001 6.482 6.482 dbcsr_make_images_dense 3978 14.8 0.029 0.029 6.403 6.403 dbcsr_complete_redistribute 329 12.2 3.057 3.057 6.365 6.365 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.218 6.218 multiply_cannon 2286 13.5 1.062 1.062 6.020 6.020 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.973 5.973 dbcsr_copy 2102 12.0 0.301 0.301 5.533 5.533 qs_create_task_list 11 7.9 0.000 0.000 5.423 5.423 generate_qs_task_list 11 8.9 3.736 3.736 5.423 5.423 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.215 5.215 dbcsr_copy_into_existing 22 7.9 5.183 5.183 5.184 5.184 density_rs2pw 119 9.7 0.007 0.007 5.074 5.074 pw_poisson_solve 119 10.3 2.183 2.183 4.956 4.956 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.927 4.927 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.927 4.927 qs_ot_p2m_diag 50 11.0 0.223 0.223 4.819 4.819 multiply_cannon_loop 2286 14.5 0.052 0.052 4.368 4.368 multiply_cannon_multrec 2286 15.5 4.231 4.231 4.314 4.314 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.308 4.308 cp_dbcsr_syevd 50 12.0 0.005 0.005 4.228 4.228 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.227 4.227 cp_fm_diag_elpa 50 13.0 0.001 0.001 4.051 4.051 cp_fm_diag_elpa_base 50 14.0 3.994 3.994 4.050 4.050 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.033 4.033 fft3d_s 1202 14.6 3.612 3.612 3.619 3.619 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.014 75.087 75.088 qs_mol_dyn_low 1 2.0 0.007 0.008 74.946 74.952 qs_forces 11 3.9 0.002 0.002 74.887 74.888 qs_energies 11 4.9 0.001 0.002 69.769 69.771 scf_env_do_scf 11 5.9 0.001 0.001 62.875 62.876 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 58.296 58.297 velocity_verlet 10 3.0 0.002 0.002 44.947 44.948 rebuild_ks_matrix 119 8.3 0.001 0.001 29.438 29.477 qs_ks_build_kohn_sham_matrix 119 9.3 0.023 0.025 29.437 29.476 qs_ks_update_qs_env 119 7.6 0.001 0.001 26.178 26.214 sum_up_and_integrate 119 10.3 0.045 0.048 22.841 22.900 integrate_v_rspace 119 11.3 0.005 0.005 22.796 22.855 qs_rho_update_rho 119 7.7 0.001 0.001 22.504 22.515 calculate_rho_elec 119 8.7 0.048 0.050 22.503 22.514 dbcsr_multiply_generic 2286 12.5 0.134 0.139 17.860 18.119 grid_integrate_task_list 119 12.3 15.864 16.753 15.864 16.753 grid_collocate_task_list 119 9.7 15.638 16.679 15.638 16.679 qs_scf_new_mos 108 7.5 0.001 0.001 14.347 14.375 qs_scf_loop_do_ot 108 8.5 0.001 0.001 14.346 14.375 ot_scf_mini 108 9.5 0.004 0.004 13.463 13.490 multiply_cannon 2286 13.5 0.230 0.234 12.002 12.426 multiply_cannon_loop 2286 14.5 0.227 0.235 10.931 11.372 mp_waitall_1 169478 16.3 9.340 9.500 9.340 9.500 rs_pw_transfer 974 11.9 0.018 0.019 7.323 8.299 ot_mini 108 10.5 0.001 0.001 7.905 7.932 density_rs2pw 119 9.7 0.010 0.010 6.217 7.215 multiply_cannon_metrocomm3 18288 15.5 0.082 0.085 6.097 6.635 pw_transfer 1439 11.6 0.137 0.146 6.239 6.316 fft_wrap_pw1pw2 1201 12.6 0.015 0.016 5.930 6.017 potential_pw2rs 119 12.3 0.010 0.011 5.616 5.628 fft_wrap_pw1pw2_140 487 13.2 0.611 0.628 5.171 5.350 init_scf_run 11 5.9 0.000 0.002 4.751 4.751 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.751 4.751 init_scf_loop 11 6.9 0.000 0.001 4.560 4.560 fft3d_ps 1201 14.6 2.426 2.581 4.412 4.498 wfi_extrapolate 11 7.9 0.001 0.001 4.356 4.356 make_m2s 4572 13.5 0.077 0.079 4.068 4.138 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.039 4.117 apply_single 119 13.6 0.001 0.001 4.039 4.116 ot_diis_step 108 11.5 0.005 0.006 4.107 4.107 qs_ot_get_derivative 108 11.5 0.001 0.002 3.769 3.796 multiply_cannon_multrec 18288 15.5 3.580 3.650 3.598 3.668 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.496 3.502 mp_waitany 9880 13.7 2.489 3.491 2.489 3.491 make_images 4572 14.5 0.190 0.194 3.362 3.439 rs_pw_transfer_RS2PW_140 130 11.5 0.600 0.649 2.267 3.262 rs_pw_transfer_PW2RS_140 130 13.9 1.373 1.418 2.838 2.874 mp_alltoall_d11v 2130 13.8 1.663 2.416 1.663 2.416 rs_gather_matrices 119 12.3 0.136 0.151 1.254 2.011 qs_ot_get_p 119 10.4 0.001 0.001 1.815 1.864 make_images_data 4572 15.5 0.063 0.069 1.631 1.750 cp_gemm 81 9.0 0.000 0.000 1.665 1.671 cp_gemm_cosma 81 10.0 1.664 1.670 1.664 1.670 hybrid_alltoall_any 4725 16.4 0.126 0.429 1.446 1.583 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.452 1.583 qs_energies_init_hamiltonians 11 5.9 0.001 0.001 1.507 1.508 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=81.38000000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.22, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=20.49, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.28, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.456, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.264, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.231, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=26.512, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.638, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.864, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.664, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.58, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.489, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.34, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 225.125 225.125 qs_mol_dyn_low 1 2.0 0.004 0.004 224.256 224.256 qs_forces 11 3.9 0.002 0.002 224.194 224.194 qs_energies 11 4.9 0.001 0.001 209.863 209.863 scf_env_do_scf 11 5.9 0.001 0.001 172.015 172.015 velocity_verlet 10 3.0 0.002 0.002 151.934 151.934 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 127.906 127.906 rebuild_ks_matrix 107 8.3 0.001 0.001 62.991 62.991 qs_ks_build_kohn_sham_matrix 107 9.3 0.020 0.020 62.990 62.990 qs_ks_update_qs_env 107 7.6 0.001 0.001 57.045 57.045 qs_rho_update_rho 107 7.7 0.001 0.001 56.746 56.746 calculate_rho_elec 107 8.7 1.411 1.411 56.745 56.745 sum_up_and_integrate 107 10.3 0.372 0.372 51.398 51.398 integrate_v_rspace 107 11.3 0.480 0.480 51.026 51.026 grid_collocate_task_list 107 9.7 50.942 50.942 50.942 50.942 grid_integrate_task_list 107 12.3 48.109 48.109 48.109 48.109 init_scf_loop 11 6.9 0.000 0.000 43.926 43.926 prepare_preconditioner 11 7.9 0.000 0.000 36.641 36.641 make_preconditioner 11 8.9 0.000 0.000 36.641 36.641 make_full_inverse_cholesky 11 9.9 0.000 0.000 34.608 34.608 qs_scf_new_mos 96 7.5 0.001 0.001 26.746 26.746 qs_scf_loop_do_ot 96 8.5 0.001 0.001 26.746 26.746 ot_scf_mini 96 9.5 0.003 0.003 24.953 24.953 dbcsr_multiply_generic 1966 12.4 0.180 0.180 23.116 23.116 cp_fm_cholesky_invert 11 10.9 20.447 20.447 20.447 20.447 init_scf_run 11 5.9 0.001 0.001 20.299 20.299 scf_env_initial_rho_setup 11 6.9 0.001 0.001 20.298 20.298 wfi_extrapolate 11 7.9 0.001 0.001 19.141 19.141 cp_gemm 81 9.0 0.000 0.000 15.824 15.824 cp_gemm_cosma 81 10.0 15.824 15.824 15.824 15.824 ot_mini 96 10.5 0.001 0.001 14.957 14.957 make_m2s 3932 13.4 0.064 0.064 13.370 13.370 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 12.355 12.355 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.682 7.682 cp_fm_cholesky_decompose 22 10.9 7.655 7.655 7.655 7.655 qs_ot_get_derivative 96 11.5 0.002 0.002 7.558 7.558 pw_transfer 1295 11.6 0.096 0.096 7.539 7.539 ot_diis_step 96 11.5 0.005 0.005 7.396 7.396 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.351 7.351 fft_wrap_pw1pw2 1081 12.6 0.010 0.010 7.229 7.229 make_images 3932 14.4 2.578 2.578 7.061 7.061 qs_create_task_list 11 7.9 0.000 0.000 6.758 6.758 generate_qs_task_list 11 8.9 5.027 5.027 6.758 6.758 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.645 6.645 dbcsr_complete_redistribute 317 12.2 3.171 3.171 6.534 6.534 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 6.509 6.509 apply_single 107 13.6 0.001 0.001 6.509 6.509 dbcsr_make_dense_low 4961 15.5 0.090 0.090 6.443 6.443 make_dense_data 4961 16.5 5.725 5.725 6.332 6.332 fft_wrap_pw1pw2_140 439 13.2 0.592 0.592 6.130 6.130 qs_ot_get_p 107 10.4 0.001 0.001 6.063 6.063 dbcsr_make_images_dense 3386 14.7 0.025 0.025 5.871 5.871 dbcsr_copy 1855 11.9 0.277 0.277 5.835 5.835 dbcsr_copy_into_existing 22 7.9 5.513 5.513 5.514 5.514 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.503 5.503 multiply_cannon 1966 13.4 0.865 0.865 5.343 5.343 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.021 5.021 calculate_w_matrix_ot 11 6.9 0.009 0.009 5.021 5.021 qs_ot_p2m_diag 44 11.0 0.194 0.194 4.646 4.646 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.575 4.575 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.013 133.199 133.200 qs_mol_dyn_low 1 2.0 0.005 0.005 133.063 133.069 qs_forces 11 3.9 0.002 0.002 133.006 133.006 qs_energies 11 4.9 0.001 0.002 123.947 123.949 scf_env_do_scf 11 5.9 0.001 0.001 113.818 113.819 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 105.699 105.699 velocity_verlet 10 3.0 0.002 0.002 79.928 79.930 rebuild_ks_matrix 107 8.3 0.001 0.001 60.487 60.554 qs_ks_build_kohn_sham_matrix 107 9.3 0.022 0.023 60.487 60.553 sum_up_and_integrate 107 10.3 0.045 0.049 54.233 54.288 integrate_v_rspace 107 11.3 0.005 0.005 54.188 54.246 qs_ks_update_qs_env 107 7.6 0.001 0.001 53.260 53.323 qs_rho_update_rho 107 7.7 0.001 0.001 50.398 50.410 calculate_rho_elec 107 8.7 0.043 0.044 50.397 50.409 grid_integrate_task_list 107 12.3 46.525 48.492 46.525 48.492 grid_collocate_task_list 107 9.7 42.960 45.070 42.960 45.070 dbcsr_multiply_generic 1966 12.4 0.121 0.125 16.866 17.140 qs_scf_new_mos 96 7.5 0.001 0.001 13.316 13.363 qs_scf_loop_do_ot 96 8.5 0.001 0.001 13.315 13.362 ot_scf_mini 96 9.5 0.003 0.003 12.502 12.549 multiply_cannon 1966 13.4 0.200 0.205 11.442 11.633 multiply_cannon_loop 1966 14.4 0.208 0.224 10.431 10.672 rs_pw_transfer 878 11.9 0.016 0.018 7.988 9.402 mp_waitall_1 146670 16.2 8.901 9.203 8.901 9.203 density_rs2pw 107 9.7 0.009 0.009 6.834 8.250 init_scf_loop 11 6.9 0.000 0.001 8.100 8.100 init_scf_run 11 5.9 0.000 0.002 7.932 7.932 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.932 7.932 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.446 7.452 ot_mini 96 10.5 0.001 0.001 7.373 7.424 wfi_extrapolate 11 7.9 0.001 0.001 7.309 7.309 multiply_cannon_metrocomm3 15728 15.4 0.074 0.077 5.867 6.234 pw_transfer 1295 11.6 0.126 0.134 5.923 6.006 fft_wrap_pw1pw2 1081 12.6 0.014 0.015 5.635 5.711 potential_pw2rs 107 12.3 0.009 0.010 5.360 5.370 fft_wrap_pw1pw2_140 439 13.2 0.567 0.583 4.922 5.091 mp_waitany 8968 13.7 3.515 4.928 3.515 4.928 rs_pw_transfer_RS2PW_140 118 11.5 0.458 0.490 3.132 4.560 fft3d_ps 1081 14.6 2.341 2.502 4.216 4.281 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.871 3.926 apply_single 107 13.6 0.001 0.001 3.870 3.926 mp_alltoall_d11v 1998 13.7 2.617 3.910 2.617 3.910 ot_diis_step 96 11.5 0.005 0.005 3.874 3.874 make_m2s 3932 13.4 0.068 0.071 3.769 3.824 multiply_cannon_multrec 15728 15.4 3.384 3.506 3.401 3.523 qs_ot_get_derivative 96 11.5 0.001 0.001 3.467 3.516 rs_gather_matrices 107 12.3 0.143 0.156 2.241 3.504 make_images 3932 14.4 0.169 0.172 3.126 3.180 rs_pw_transfer_PW2RS_140 118 13.9 1.376 1.422 2.835 2.867 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=82.148, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=50.942, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.109, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=20.447, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.824, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.655, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=27.914000000000016, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=42.96, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.525, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.515, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.901, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.384, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.463 0.463 285.354 285.354 qs_energies 1 2.0 0.000 0.000 284.018 284.018 scf_env_do_scf 1 3.0 0.000 0.000 281.512 281.512 qs_ks_update_qs_env 8 5.0 0.000 0.000 263.510 263.510 rebuild_ks_matrix 7 6.0 0.000 0.000 263.394 263.394 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 263.394 263.394 hfx_ks_matrix 7 8.0 0.000 0.000 175.093 175.093 integrate_four_center 7 9.0 2.091 2.091 175.058 175.058 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 165.334 165.334 integrate_four_center_main 7 10.0 1.729 1.729 161.729 161.729 integrate_four_center_bin 451 11.0 160.001 160.001 160.001 160.001 init_scf_loop 1 4.0 0.000 0.000 116.158 116.158 cp_gemm 129 10.3 0.001 0.001 73.294 73.294 cp_gemm_cosma 129 11.3 73.293 73.293 73.293 73.293 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 41.821 41.821 admm_fit_mo_coeffs 7 9.0 0.000 0.000 40.013 40.013 admm_mo_merge_derivs 7 8.0 0.000 0.000 37.890 37.890 merge_mo_derivs_diag 7 9.0 0.022 0.022 37.890 37.890 purify_mo_diag 7 10.0 0.001 0.001 23.980 23.980 fit_mo_coeffs 7 10.0 0.000 0.000 16.033 16.033 prepare_preconditioner 1 5.0 0.000 0.000 14.017 14.017 make_preconditioner 1 6.0 0.000 0.000 14.017 14.017 integrate_four_center_load 7 10.0 0.000 0.000 10.839 10.839 hfx_load_balance 1 11.0 0.002 0.002 10.839 10.839 arnoldi_normal_ev 11 9.3 0.002 0.002 8.806 8.806 estimate_cond_num 1 7.0 0.000 0.000 8.728 8.728 build_subspace 28 9.5 0.015 0.015 8.703 8.703 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.211 0.216 190.509 190.510 qs_energies 1 2.0 0.000 0.000 190.152 190.153 scf_env_do_scf 1 3.0 0.000 0.001 189.583 189.584 qs_ks_update_qs_env 8 5.0 0.000 0.000 186.594 186.594 rebuild_ks_matrix 7 6.0 0.000 0.000 186.580 186.580 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 186.580 186.580 hfx_ks_matrix 7 8.0 0.000 0.001 174.468 174.469 integrate_four_center 7 9.0 0.109 0.416 174.452 174.452 integrate_four_center_main 7 10.0 0.005 0.005 158.693 162.340 integrate_four_center_bin 448 11.0 158.689 162.335 158.689 162.335 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 110.080 110.080 init_scf_loop 1 4.0 0.000 0.000 79.502 79.503 integrate_four_center_load 7 10.0 0.000 0.001 11.010 11.016 hfx_load_balance 1 11.0 0.001 0.001 11.010 11.016 mp_sync 70 11.3 3.890 6.837 3.890 6.837 hfx_load_balance_bin 1 12.0 5.333 5.504 5.333 5.504 hfx_load_balance_count 1 12.0 5.316 5.493 5.316 5.493 cp_gemm 129 10.3 0.000 0.001 4.902 4.908 cp_gemm_cosma 129 11.3 4.901 4.908 4.901 4.908 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=47.77699999999996, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=160.001, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=73.293, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.091, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=1.729, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.463, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=12.054999999999978, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=158.689, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=4.901, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.109, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.211, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.333, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.316, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.89, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 408.938 408.938 qs_energies 1 2.0 0.000 0.000 408.380 408.380 mp2_main 1 3.0 0.000 0.000 402.110 402.110 mp2_gpw_main 1 4.0 0.000 0.000 401.701 401.701 rpa_ri_compute_en 1 5.0 0.000 0.000 387.769 387.769 rpa_num_int 1 6.0 0.001 0.001 387.745 387.745 compute_mat_P_omega 1 7.0 0.002 0.002 180.453 180.453 compute_mat_P_omega_contract 10 8.0 12.439 12.439 178.839 178.839 cp_gemm 105 8.4 0.001 0.001 177.842 177.842 cp_gemm_cosma 105 9.4 177.842 177.842 177.842 177.842 dbt_total 2336 9.6 0.021 0.021 171.211 171.211 GW_matrix_operations 10 7.0 0.005 0.005 123.693 123.693 dbt_contract 787 11.0 49.932 49.932 104.810 104.810 dbt_copy 1103 10.7 21.635 21.635 65.031 65.031 compute_mat_P_omega_calc_M_occ 250 9.0 12.468 12.468 64.731 64.731 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 61.014 61.014 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 58.827 58.827 dbt_tas_total 1149 12.2 0.266 0.266 51.532 51.532 dbt_tas_multiply 807 12.1 0.005 0.005 50.193 50.193 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 39.860 39.860 dbt_tas_dbm 807 14.1 0.006 0.006 39.793 39.793 dbm_multiply 807 16.1 39.779 39.779 39.779 39.779 dbt_tas_mm_1N 524 15.1 0.002 0.002 24.293 24.293 dbt_tas_copy 574 11.4 17.219 17.219 20.663 20.663 compute_QP_energies 1 7.0 0.000 0.000 20.583 20.583 compute_self_energy_cubic_gw 1 8.0 0.104 0.104 20.583 20.583 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 18.859 18.859 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 14.927 14.927 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 13.917 13.917 dbt_tas_mm_2 251 15.0 0.002 0.002 13.718 13.718 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 13.469 13.469 dbt_tas_reserve_blocks_index 3261 13.7 7.855 7.855 13.417 13.417 dbt_copy_nocomm 251 12.0 11.699 11.699 13.334 13.334 dbt_reserve_blocks_index 2280 12.5 1.699 1.699 11.309 11.309 dbt_reserve_blocks_index_array 2222 11.6 0.013 0.013 11.268 11.268 cp_fm_cholesky_invert 10 8.0 9.029 9.029 9.029 9.029 contract_cubic_gw 21 9.0 0.000 0.000 8.764 8.764 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.009 58.733 58.733 qs_energies 1 2.0 0.001 0.001 58.604 58.604 mp2_main 1 3.0 0.000 0.001 56.964 56.965 mp2_gpw_main 1 4.0 0.000 0.001 56.901 56.902 rpa_ri_compute_en 1 5.0 0.000 0.000 54.874 54.875 rpa_num_int 1 6.0 0.001 0.001 54.866 54.867 dbt_total 2336 9.6 0.019 0.020 42.232 42.233 compute_mat_P_omega 1 7.0 0.001 0.002 41.201 41.207 compute_mat_P_omega_contract 10 8.0 0.814 0.844 40.875 40.880 dbt_contract 787 11.0 1.994 2.145 30.461 30.466 dbt_tas_total 1149 12.2 0.082 0.087 26.536 26.536 dbt_tas_multiply 807 12.1 0.003 0.003 26.444 26.446 dbt_tas_dbm 807 14.1 0.005 0.006 18.363 18.364 dbm_multiply 807 16.1 14.753 15.778 14.753 15.778 compute_mat_P_omega_calc_M_occ 250 9.0 0.788 0.826 13.611 13.612 dbt_copy 1111 10.7 4.386 4.642 10.064 10.418 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 10.039 10.040 cp_gemm 105 8.4 0.000 0.000 8.955 8.970 cp_gemm_cosma 105 9.4 8.954 8.969 8.954 8.969 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 8.346 8.347 mp_sync 8706 11.6 6.601 7.904 6.601 7.904 dbt_tas_mm_1N 524 15.1 0.002 0.003 7.358 7.892 dbt_tas_mm_2 251 15.0 0.002 0.003 7.859 7.859 GW_matrix_operations 10 7.0 0.001 0.002 5.737 5.745 compute_QP_energies 1 7.0 0.000 0.000 4.355 4.355 compute_self_energy_cubic_gw 1 8.0 0.005 0.005 4.352 4.355 dbt_communicate_buffer 1098 11.7 0.098 0.105 4.132 4.313 mp_waitall_2 3776 14.7 3.917 4.230 3.917 4.230 contract_cubic_gw 21 9.0 0.000 0.000 3.287 3.287 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.167 3.167 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.043 3.045 dbt_reserve_blocks_index 2849 12.4 0.114 0.123 2.675 2.982 dbt_reserve_blocks_index_array 2791 11.4 0.013 0.014 2.673 2.978 dbt_tas_reserve_blocks_index 3300 13.8 0.274 0.294 2.625 2.924 dbm_reserve_blocks 3696 14.8 2.429 2.707 2.429 2.707 dbt_tas_replicate 396 14.1 1.233 1.480 2.330 2.378 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.022 2.025 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 1.886 1.892 convert_to_new_pgrid 2421 14.1 0.041 0.047 1.758 1.871 dbm_copy 1608 15.1 1.706 1.821 1.706 1.821 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.670 1.674 scf_env_do_scf 1 3.0 0.000 0.000 1.578 1.578 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.578 1.578 mp_max_i 1992 9.8 1.043 1.339 1.043 1.339 dbm_add 807 14.1 1.134 1.194 1.134 1.194 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=102.531, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=177.842, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_contract", label="dbt_contract", y=49.932, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=39.779, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_copy", label="dbt_copy", y=21.635, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=17.219, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=18.127999999999993, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=8.954, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_contract", label="dbt_contract", y=1.994, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=14.753, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_copy", label="dbt_copy", y=4.386, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.601, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.917, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.021 0.021 761.829 761.829 qs_forces 1 2.0 0.000 0.000 760.929 760.929 rebuild_ks_matrix 7 6.6 0.000 0.000 751.214 751.214 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 751.214 751.214 hfx_ks_matrix 7 8.6 0.000 0.000 748.368 748.368 dbt_total 4858 11.6 0.055 0.055 640.762 640.762 hfx_ri_update_ks 7 9.6 0.000 0.000 524.170 524.170 hfx_ri_update_ks_Pmat 7 10.6 69.740 69.740 524.165 524.165 qs_energies 1 3.0 0.000 0.000 467.851 467.851 scf_env_do_scf 1 4.0 0.000 0.000 467.365 467.365 qs_ks_update_qs_env 8 6.0 0.000 0.000 458.242 458.242 dbt_contract 1473 13.0 192.329 192.329 425.044 425.044 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 292.978 292.978 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 271.575 271.575 dbt_tas_total 2391 14.1 2.040 2.040 250.628 250.628 hfx_ri_update_forces 1 7.0 0.000 0.000 224.192 224.192 dbt_tas_multiply 1482 14.0 0.006 0.006 217.651 217.651 init_scf_loop 2 5.0 0.000 0.000 195.787 195.787 hfx_ri_update_ks_Pmat_KS 567 11.6 0.007 0.007 184.319 184.319 dbt_copy 2330 12.4 93.556 93.556 182.664 182.664 dbt_tas_dbm 1482 16.0 0.010 0.010 175.655 175.655 dbm_multiply 1482 18.0 175.627 175.627 175.627 175.627 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.003 0.003 150.001 150.001 hfx_ri_forces_Pmat_3c 1 8.0 0.003 0.003 131.444 131.444 dbt_tas_mm_2 649 17.1 0.007 0.007 119.024 119.024 precalc_derivatives 1 8.0 0.011 0.011 72.919 72.919 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 53.419 53.419 dbt_tas_mm_3T 659 17.1 0.003 0.003 44.738 44.738 dbt_tas_reserve_blocks_index 7234 15.4 19.406 19.406 36.792 36.792 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.002 0.002 36.330 36.330 dbt_tas_copy 1474 13.1 27.540 27.540 35.316 35.316 dbt_tas_reshape 906 14.4 0.025 0.025 33.960 33.960 dbt_reserve_blocks_index 4836 14.6 4.113 4.113 31.421 31.421 dbt_reserve_blocks_index_array 4801 13.6 0.035 0.035 31.155 31.155 build_3c_derivatives 9 9.0 4.065 4.065 30.136 30.136 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.001 0.001 21.717 21.717 dbt_tas_reshape_buffer_obtain 906 15.4 13.906 13.906 20.020 20.020 dbt_split_copyback 87 12.2 14.375 14.375 16.159 16.159 dbt_split_blocks_generic 174 12.2 12.243 12.243 15.369 15.369 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.011 109.520 109.521 qs_forces 1 2.0 0.000 0.000 109.238 109.238 rebuild_ks_matrix 7 6.6 0.000 0.000 108.069 108.070 qs_ks_build_kohn_sham_matrix 7 7.6 0.003 0.003 108.069 108.070 hfx_ks_matrix 7 8.6 0.000 0.001 106.246 106.264 dbt_total 4858 11.6 0.042 0.047 96.152 96.155 dbt_contract 1473 13.0 6.461 7.729 72.617 72.634 dbt_tas_total 2391 14.1 0.333 0.547 69.590 69.591 dbt_tas_multiply 1482 14.0 0.007 0.008 60.167 60.172 hfx_ri_update_ks 7 9.6 0.000 0.000 57.547 57.547 hfx_ri_update_ks_Pmat 7 10.6 3.055 4.271 57.545 57.545 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 56.091 56.091 qs_energies 1 3.0 0.000 0.000 53.128 53.129 scf_env_do_scf 1 4.0 0.000 0.000 52.907 52.907 qs_ks_update_qs_env 8 6.0 0.000 0.000 51.979 51.980 hfx_ri_update_forces 1 7.0 0.001 0.001 48.698 48.716 dbt_tas_dbm 1482 16.0 0.009 0.010 41.484 41.485 hfx_ri_forces_Pmat_3c 1 8.0 0.003 0.004 39.796 39.809 dbm_multiply 1482 18.0 27.574 33.573 27.574 33.573 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 30.687 30.687 mp_sync 17507 13.6 23.170 27.965 23.170 27.965 hfx_ri_update_ks_Pmat_KS 567 11.6 0.006 0.007 26.177 26.177 dbt_tas_mm_2 649 17.1 0.007 0.007 22.608 22.609 init_scf_loop 2 5.0 0.000 0.000 22.219 22.219 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.003 0.003 14.158 14.159 dbt_copy 2352 12.4 3.841 4.083 11.118 12.034 dbt_tas_mm_3T 659 17.1 0.003 0.004 7.253 7.834 dbt_tas_merge 649 14.1 3.464 4.749 6.230 7.796 precalc_derivatives 1 8.0 0.003 0.003 7.188 7.188 mp_waitall_2 6131 16.2 5.876 6.483 5.876 6.483 dbt_tas_mm_3N 163 16.5 0.001 0.001 6.089 6.239 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 6.061 6.062 dbt_tas_reserve_blocks_index 7352 15.5 0.679 0.813 4.218 5.175 dbt_tas_communicate_buffer 1908 16.3 0.111 0.117 4.540 5.055 dbt_tas_reshape 999 14.5 0.020 0.022 4.451 4.883 dbm_reserve_blocks 8261 16.3 3.932 4.780 3.932 4.780 dbt_tas_replicate 909 15.6 1.405 1.463 4.536 4.645 dbt_reserve_blocks_index 5240 14.5 0.205 0.235 3.524 4.226 convert_to_new_pgrid 4446 16.0 0.080 0.169 3.351 4.217 dbt_reserve_blocks_index_array 5205 13.5 0.023 0.024 3.517 4.216 mp_max_i 3372 12.5 3.260 4.159 3.260 4.159 dbm_copy 3041 16.9 3.109 4.032 3.109 4.032 mp_sum_l 38121 15.3 3.219 4.008 3.219 4.008 mp_alltoall_i 3170 15.2 3.452 3.924 3.452 3.924 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.001 3.502 3.510 build_3c_derivatives 9 9.0 0.620 0.668 3.478 3.481 dbm_add 1482 16.0 2.231 3.184 2.231 3.184 dbt_communicate_buffer 1260 13.5 0.071 0.077 2.489 2.741 dbt_tas_merge_communicate_buff 649 15.1 0.013 0.014 2.186 2.624 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.003 0.003 2.599 2.600 dbt_tas_reshape_alltoall 999 15.5 0.002 0.003 2.048 2.467 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=203.03700000000003, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_contract", label="dbt_contract", y=192.329, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=175.627, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_copy", label="dbt_copy", y=93.556, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=69.74, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=27.54, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=35.61099999999999, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_contract", label="dbt_contract", y=6.461, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=27.574, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_copy", label="dbt_copy", y=3.841, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=3.055, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=5.876, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.932, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=23.17, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 277.375 277.375 qs_energies 1 2.0 0.000 0.000 277.110 277.110 mp2_main 1 3.0 0.000 0.000 262.287 262.287 mp2_gpw_main 1 4.0 0.002 0.002 260.983 260.983 mp2_ri_gpw_compute_in 1 5.0 0.988 0.988 188.158 188.158 mp2_ri_gpw_compute_in_loop 1 6.0 0.039 0.039 153.884 153.884 mp2_eri_3c_integrate_gpw 2656 7.0 0.040 0.040 112.061 112.061 integrate_v_rspace 2666 8.0 1.130 1.130 90.717 90.717 grid_integrate_task_list 2666 9.0 86.123 86.123 86.123 86.123 mp2_ri_gpw_compute_en 1 5.0 0.056 0.056 72.765 72.765 mp2_ri_gpw_compute_en_RI_loop 1 6.0 35.815 35.815 68.727 68.727 calculate_wavefunction 5312 9.0 28.642 28.642 38.889 38.889 get_2c_integrals 1 6.0 0.000 0.000 33.221 33.221 compute_2c_integrals 1 7.0 0.013 0.013 30.964 30.964 compute_2c_integrals_loop_lm 1 8.0 0.029 0.029 30.915 30.915 mp2_eri_2c_integrate_gpw 1 9.0 5.703 5.703 30.886 30.886 dbcsr_multiply_generic 5322 8.0 0.371 0.371 25.617 25.617 ao_to_mo_and_store_B_mult_1 2656 7.0 0.029 0.029 25.577 25.577 mp2_ri_gpw_compute_en_expansio 2080 7.0 3.915 3.915 21.313 21.313 pw_transfer 63872 10.6 2.271 2.271 18.013 18.013 offload_gemm 2080 8.0 17.398 17.398 17.398 17.398 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 10.647 10.647 16.012 16.012 fft_wrap_pw1pw2 53228 11.4 0.249 0.249 15.385 15.385 scf_env_do_scf 1 3.0 0.000 0.000 14.092 14.092 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 14.092 14.092 multiply_cannon 5322 9.0 0.971 0.971 12.812 12.812 qs_scf_new_mos 10 5.0 0.000 0.000 11.848 11.848 mp2_ri_gpw_compute_en_ener 2080 7.0 11.587 11.587 11.587 11.587 multiply_cannon_loop 5322 10.0 0.175 0.175 10.382 10.382 fft_wrap_pw1pw2_20 21271 12.4 0.629 0.629 10.334 10.334 fft3d_s 53229 13.4 10.082 10.082 10.164 10.164 make_m2s 10644 9.0 0.110 0.110 9.287 9.287 make_images 10644 10.0 3.220 3.220 8.815 8.815 multiply_cannon_multrec 5322 11.0 8.721 8.721 8.786 8.786 eigensolver 11 5.8 0.002 0.002 8.116 8.116 cp_fm_diag_elpa 11 6.8 0.000 0.000 7.348 7.348 cp_fm_diag_elpa_base 11 7.8 7.098 7.098 7.348 7.348 potential_pw2rs 5322 10.0 0.290 0.290 6.814 6.814 copy_dbcsr_to_fm 2679 8.0 0.059 0.059 6.546 6.546 mp2_eri_2c_integrate_gpw_pot_l 2656 10.0 0.014 0.014 5.646 5.646 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.010 62.397 62.398 qs_energies 1 2.0 0.001 0.001 62.280 62.280 mp2_main 1 3.0 0.000 0.000 58.981 58.981 mp2_gpw_main 1 4.0 0.001 0.002 58.812 58.813 mp2_ri_gpw_compute_in 1 5.0 0.055 0.057 34.673 35.178 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 31.916 32.423 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 28.542 29.000 integrate_v_rspace 93 8.1 0.202 0.217 28.249 28.695 grid_integrate_task_list 93 9.1 27.664 28.085 27.664 28.085 mp2_ri_gpw_compute_en 1 5.0 0.305 0.308 24.038 24.493 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.280 2.477 20.679 21.029 mp2_ri_gpw_compute_en_expansio 65 7.0 0.239 0.290 9.304 9.665 offload_gemm 65 8.0 9.065 9.444 9.065 9.444 mp2_ri_gpw_compute_en_comm 65 7.0 4.689 5.303 8.252 8.772 mp_sendrecv_dm3 390 8.0 3.563 4.160 3.563 4.160 scf_env_do_scf 1 3.0 0.000 0.000 3.099 3.101 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 3.099 3.100 dbcsr_multiply_generic 176 8.0 0.013 0.014 2.796 2.936 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.764 2.899 get_2c_integrals 1 6.0 0.000 0.000 2.585 2.600 compute_2c_integrals 1 7.0 0.005 0.006 2.227 2.234 mp2_ri_create_group 1 6.0 0.000 0.000 2.189 2.195 replicate_iaK_2intgroup 1 7.0 1.654 1.787 2.071 2.195 compute_2c_integrals_loop_lm 1 8.0 0.002 0.003 1.991 2.105 mp2_eri_2c_integrate_gpw 1 9.0 0.434 0.473 1.989 2.103 calculate_wavefunction 166 9.0 0.876 0.912 1.633 1.689 make_m2s 352 9.0 0.005 0.005 1.476 1.569 pw_transfer 2120 10.5 0.081 0.085 1.526 1.564 make_images 352 10.0 0.066 0.069 1.462 1.555 qs_scf_new_mos 10 5.0 0.000 0.000 1.469 1.489 eigensolver 11 5.8 0.001 0.001 1.471 1.472 fft_wrap_pw1pw2 1768 11.4 0.009 0.010 1.414 1.449 multiply_cannon 176 9.0 0.023 0.024 1.251 1.319 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=97.81, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=86.123, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=35.815, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=28.642, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=17.398, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_ener", label="mp2_ri_gpw_compute_en_ener", y=11.587, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_comm", label="mp2_ri_gpw_compute_en_comm", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=14.259999999999991, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.664, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=2.28, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.876, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=9.065, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_ener", label="mp2_ri_gpw_compute_en_ener", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_comm", label="mp2_ri_gpw_compute_en_comm", y=4.689, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=3.563, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.101 0.101 200.745 200.745 qs_energies 1 2.0 0.000 0.000 198.945 198.945 scf_env_do_scf 1 3.0 0.000 0.000 188.457 188.457 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 188.456 188.456 qs_scf_new_mos 15 5.0 0.000 0.000 83.395 83.395 qs_ks_update_qs_env 15 5.0 0.000 0.000 73.080 73.080 rebuild_ks_matrix 15 6.0 0.000 0.000 72.690 72.690 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 72.690 72.690 eigensolver 15 6.0 0.002 0.002 68.809 68.809 cp_fm_diag_elpa 15 7.0 0.000 0.000 54.049 54.049 cp_fm_diag_elpa_base 15 8.0 49.196 49.196 54.048 54.048 qs_vxc_create 15 8.0 0.016 0.016 47.503 47.503 calculate_dispersion_nonloc 15 9.0 9.437 9.437 41.137 41.137 pw_transfer 1191 10.0 0.100 0.100 29.581 29.581 fft_wrap_pw1pw2 1086 11.0 0.014 0.014 29.302 29.302 qs_rho_update_rho 16 5.0 0.000 0.000 24.860 24.860 calculate_rho_elec 16 6.0 0.339 0.339 24.860 24.860 grid_collocate_task_list 16 7.0 22.976 22.976 22.976 22.976 sum_up_and_integrate 15 8.0 0.084 0.084 22.824 22.824 integrate_v_rspace 15 9.0 0.033 0.033 22.740 22.740 grid_integrate_task_list 15 10.0 22.067 22.067 22.067 22.067 fft_wrap_pw1pw2_150 765 12.0 3.555 3.555 20.818 20.818 pw_scatter_s 585 13.1 12.603 12.603 12.603 12.603 copy_dbcsr_to_fm 16 5.9 0.001 0.001 11.673 11.673 fft3d_s 1087 13.0 11.059 11.059 11.071 11.071 dbcsr_complete_redistribute 46 8.3 3.688 3.688 10.380 10.380 cp_fm_cholesky_restore 45 7.0 10.071 10.071 10.071 10.071 cp_fm_upper_to_full 30 8.0 9.539 9.539 9.539 9.539 gspace_mixing 14 5.0 0.272 0.272 8.415 8.415 vdW_energy 15 10.0 8.391 8.391 8.391 8.391 fft_wrap_pw1pw2_200 197 12.3 0.753 0.753 8.227 8.227 broyden_mixing 14 6.0 7.444 7.444 7.444 7.444 xc_vxc_pw_create 15 9.0 0.334 0.334 6.351 6.351 init_scf_run 1 3.0 0.000 0.000 4.881 4.881 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.755 4.755 dbcsr_finalize 159 9.9 0.023 0.023 4.585 4.585 dbcsr_merge_all 91 11.1 0.074 0.074 4.425 4.425 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.015 81.033 81.034 qs_energies 1 2.0 0.000 0.001 80.647 80.648 scf_env_do_scf 1 3.0 0.000 0.000 75.538 75.539 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 75.538 75.539 qs_ks_update_qs_env 15 5.0 0.000 0.000 36.701 36.730 rebuild_ks_matrix 15 6.0 0.000 0.000 36.663 36.691 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.004 36.662 36.691 sum_up_and_integrate 15 8.0 0.008 0.009 23.075 23.120 integrate_v_rspace 15 9.0 0.001 0.001 23.067 23.113 qs_rho_update_rho 16 5.0 0.000 0.000 22.947 22.949 calculate_rho_elec 16 6.0 0.011 0.012 22.947 22.949 grid_integrate_task_list 15 10.0 21.724 22.115 21.724 22.115 grid_collocate_task_list 16 7.0 21.344 21.758 21.344 21.758 qs_scf_new_mos 15 5.0 0.000 0.001 16.533 16.686 eigensolver 15 6.0 0.002 0.002 15.344 15.356 qs_vxc_create 15 8.0 0.001 0.001 13.197 13.202 cp_fm_diag_elpa 15 7.0 0.000 0.000 11.218 11.224 cp_fm_diag_elpa_base 15 8.0 10.982 11.024 11.211 11.214 calculate_dispersion_nonloc 15 9.0 1.344 1.381 10.810 10.820 pw_transfer 1191 10.0 0.106 0.113 9.316 9.404 fft_wrap_pw1pw2 1086 11.0 0.016 0.017 9.099 9.188 fft3d_ps 1086 13.0 4.031 4.149 6.964 7.162 fft_wrap_pw1pw2_150 765 12.0 0.484 0.506 6.153 6.199 cp_fm_cholesky_restore 45 7.0 3.915 3.965 3.915 3.965 qs_energies_init_hamiltonians 1 3.0 0.001 0.003 3.197 3.199 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.779 3.024 fft_wrap_pw1pw2_200 197 12.3 0.256 0.275 2.827 2.876 xc_vxc_pw_create 15 9.0 0.024 0.029 2.386 2.402 mp_alltoall_z22v 1086 15.0 1.852 2.258 1.852 2.258 vdW_energy 15 10.0 2.006 2.131 2.006 2.131 build_core_ppnl 1 5.0 1.859 2.054 1.859 2.054 rs_pw_transfer 158 9.4 0.002 0.002 1.565 1.923 density_rs2pw 16 7.0 0.001 0.002 1.480 1.813 x_to_yz 585 14.1 0.684 0.720 1.573 1.689 xc_pw_derive 90 11.0 0.002 0.002 1.550 1.622 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=72.77300000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=49.196, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.976, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.067, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=12.603, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.059, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.071, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=19.037000000000006, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=10.982, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.344, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.724, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.915, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=4.031, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.091 0.091 333.844 333.844 qs_energies 1 2.0 0.000 0.000 333.677 333.677 ls_scf 1 3.0 0.000 0.000 331.754 331.754 ls_scf_main 1 4.0 0.002 0.002 316.696 316.696 density_matrix_trs4 11 5.0 0.012 0.012 177.058 177.058 ls_scf_dm_to_ks 11 5.0 0.000 0.000 132.382 132.382 matrix_ls_to_qs 11 6.0 0.000 0.000 127.882 127.882 dbcsr_multiply_generic 185 6.1 0.637 0.637 110.370 110.370 multiply_cannon 185 7.1 3.364 3.364 75.223 75.223 dbcsr_copy_into_existing 11 7.0 72.388 72.388 72.388 72.388 dbcsr_complete_redistribute 23 7.5 43.253 43.253 60.683 60.683 matrix_decluster 11 7.0 0.000 0.000 55.492 55.492 multiply_cannon_loop 185 8.1 0.430 0.430 53.443 53.443 multiply_cannon_multrec 185 9.1 51.446 51.446 51.569 51.569 arnoldi_extremal 12 6.1 0.000 0.000 46.541 46.541 arnoldi_normal_ev 12 7.1 0.028 0.028 46.541 46.541 build_subspace 23 8.1 0.137 0.137 45.725 45.725 dbcsr_matrix_vector_mult 652 9.0 0.259 0.259 35.481 35.481 dbcsr_matrix_vector_mult_local 652 10.0 33.885 33.885 33.894 33.894 make_m2s 370 7.1 0.032 0.032 28.792 28.792 make_images 370 8.1 7.106 7.106 26.429 26.429 dbcsr_finalize 646 7.5 0.213 0.213 23.096 23.096 dbcsr_merge_all 597 8.5 3.883 3.883 21.067 21.067 setup_rec_index_2d 370 8.1 18.267 18.267 18.267 18.267 dbcsr_sort_indices 1103 9.9 16.909 16.909 16.909 16.909 tree_to_linear_d 110 9.4 15.202 15.202 15.202 15.202 quick_finalize 395 10.0 0.532 0.532 14.411 14.411 ls_scf_init_scf 1 4.0 0.000 0.000 14.061 14.061 ls_scf_init_matrix_S 1 5.0 0.000 0.000 13.606 13.606 dbcsr_special_finalize 370 9.1 0.003 0.003 13.279 13.279 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 12.716 12.716 dbcsr_dot_sd 144 6.3 9.928 9.928 9.929 9.929 dbcsr_frobenius_norm 142 6.1 8.114 8.114 8.116 8.116 matrix_qs_to_ls 12 5.1 0.000 0.000 7.574 7.574 matrix_cluster 12 6.1 0.000 0.000 7.574 7.574 dbcsr_new_transposed 2 7.0 0.149 0.149 7.135 7.135 dbcsr_redistribute 2 8.0 6.877 6.877 6.943 6.943 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.011 82.191 82.192 qs_energies 1 2.0 0.000 0.000 82.083 82.083 ls_scf 1 3.0 0.000 0.000 82.000 82.001 ls_scf_main 1 4.0 0.000 0.003 78.621 78.621 density_matrix_trs4 11 5.0 0.008 0.013 74.979 75.041 dbcsr_multiply_generic 185 6.1 0.075 0.093 69.980 70.152 multiply_cannon 185 7.1 0.038 0.041 58.502 59.470 multiply_cannon_loop 185 8.1 0.183 0.190 55.379 56.794 multiply_cannon_multrec 1480 9.1 37.281 39.379 37.726 39.832 mp_waitall_1 11936 10.3 16.070 17.995 16.070 17.995 multiply_cannon_metrocomm3 1480 9.1 0.016 0.018 9.440 12.890 make_m2s 370 7.1 0.032 0.035 7.976 8.085 make_images 370 8.1 0.700 0.723 7.859 7.966 multiply_cannon_metrocomm1 1480 9.1 0.010 0.012 3.713 5.036 calculate_norms 2960 9.1 4.246 4.420 4.246 4.420 arnoldi_extremal 12 6.1 0.000 0.000 3.878 3.885 arnoldi_normal_ev 12 7.1 0.002 0.008 3.878 3.884 build_subspace 23 8.1 0.036 0.050 3.750 3.754 make_images_data 370 9.1 0.011 0.012 3.218 3.524 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.142 3.206 dbcsr_matrix_vector_mult 652 9.0 0.018 0.078 3.132 3.195 mp_sum_l 1039 5.9 2.198 3.048 2.198 3.048 dbcsr_complete_redistribute 23 7.5 1.898 1.990 2.791 2.880 matrix_ls_to_qs 11 6.0 0.000 0.000 2.766 2.855 hybrid_alltoall_any 393 9.9 0.234 1.209 2.603 2.797 ls_scf_init_scf 1 4.0 0.000 0.000 2.687 2.688 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.651 2.658 dbcsr_matrix_vector_mult_local 652 10.0 2.507 2.619 2.511 2.624 matrix_decluster 11 7.0 0.000 0.000 2.498 2.592 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.426 2.429 make_images_pack 370 9.1 2.005 2.176 2.008 2.180 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.001 1.331 2.049 dbcsr_finalize 646 7.5 0.013 0.014 1.797 1.948 dbcsr_add_d 280 6.0 0.001 0.002 1.740 1.803 dbcsr_add_anytype 280 7.0 0.921 0.975 1.739 1.802 buffer_matrices_ensure_size 370 8.1 1.527 1.644 1.527 1.644 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=114.60500000000002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=72.388, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=51.446, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=43.253, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=33.885, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.267, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=17.991, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=37.281, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.898, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.507, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.246, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.07, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.198, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 103.432 103.432 lib_test 1 2.0 0.000 0.000 103.425 103.425 dbcsr_run_tests 3 3.0 0.003 0.003 103.425 103.425 test_multiplies_multiproc 3 4.0 0.001 0.001 83.262 83.262 dbcsr_redistribute 9 5.0 56.371 56.371 60.000 60.000 dbcsr_multiply_generic 9 5.0 0.001 0.001 21.423 21.423 dbcsr_make_random_matrix 9 4.0 14.400 14.400 20.073 20.073 multiply_cannon 9 6.0 0.002 0.002 14.653 14.653 multiply_cannon_loop 9 7.0 0.003 0.003 14.137 14.137 multiply_cannon_multrec 9 8.0 14.134 14.134 14.135 14.135 dbcsr_finalize 27 5.7 0.005 0.005 9.628 9.628 dbcsr_merge_all 18 6.5 3.397 3.397 8.892 8.892 tree_to_linear_d 9 7.0 3.427 3.427 3.427 3.427 mp_alltoall_d11v 27 6.0 3.301 3.301 3.301 3.301 dbcsr_data_release 975 7.6 2.423 2.423 2.423 2.423 make_m2s 18 6.0 0.001 0.001 2.207 2.207 make_images 18 7.0 0.734 0.734 2.135 2.135 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 22.088 22.089 lib_test 1 2.0 0.000 0.000 22.057 22.078 dbcsr_run_tests 3 3.0 0.000 0.001 22.055 22.077 test_multiplies_multiproc 3 4.0 0.001 0.002 20.928 20.985 dbcsr_multiply_generic 9 5.0 0.001 0.001 19.084 19.177 multiply_cannon 9 6.0 0.002 0.002 16.974 17.373 multiply_cannon_loop 9 7.0 0.003 0.003 16.617 17.006 multiply_cannon_multrec 72 8.0 13.876 14.554 13.877 14.555 mp_waitall_1 576 9.2 3.093 3.853 3.093 3.853 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 2.416 3.239 dbcsr_make_random_matrix 9 4.0 0.893 0.983 1.083 1.173 mp_sum_l 310 2.7 0.493 1.079 0.493 1.079 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.489 1.076 dbcsr_finalize 27 5.7 0.000 0.000 0.833 0.898 make_m2s 18 6.0 0.001 0.001 0.778 0.839 make_images 18 7.0 0.026 0.027 0.775 0.836 dbcsr_data_release 444 7.6 0.699 0.792 0.699 0.792 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.315 0.780 dbcsr_merge_all 18 6.5 0.129 0.138 0.717 0.764 dbcsr_destroy 111 5.9 0.007 0.054 0.592 0.687 dbcsr_redistribute 9 5.0 0.337 0.384 0.549 0.573 dbcsr_checksum 6 5.0 0.408 0.493 0.498 0.498 dbcsr_data_copy_aa2 18 7.5 0.438 0.476 0.438 0.476 make_images_data 18 8.0 0.001 0.001 0.395 0.456 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.279999999999987, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=56.371, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=14.4, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=14.134, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.427, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.397, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.423, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.5680000000000014, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.337, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.893, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=13.876, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.129, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.699, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.493, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.093, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.050 0.050 139.016 139.016 qs_mol_dyn_low 1 2.0 0.005 0.005 137.025 137.025 velocity_verlet 5 3.0 0.005 0.005 110.793 110.793 qmmm_el_coupling 6 3.8 0.000 0.000 68.254 68.254 qmmm_elec_with_gaussian 6 4.8 0.201 0.201 68.248 68.248 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 67.236 67.236 qmmm_elec_gaussian_low_G 6 6.8 65.745 65.745 65.745 65.745 qs_forces 6 3.8 0.001 0.001 55.446 55.446 qs_energies 6 4.8 0.000 0.000 49.135 49.135 scf_env_do_scf 6 5.8 0.001 0.001 45.301 45.301 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 38.097 38.097 rebuild_ks_matrix 45 8.4 0.000 0.000 37.534 37.534 qs_ks_build_kohn_sham_matrix 45 9.4 0.008 0.008 37.533 37.533 qs_ks_update_qs_env 45 7.8 0.000 0.000 32.006 32.006 pw_transfer 966 12.3 0.075 0.075 23.519 23.519 fft_wrap_pw1pw2 801 13.6 0.009 0.009 23.201 23.201 fft_wrap_pw1pw2_150 507 15.2 2.387 2.387 22.682 22.682 qs_vxc_create 45 10.4 0.001 0.001 18.901 18.901 xc_vxc_pw_create 45 11.4 1.006 1.006 18.900 18.900 xc_pw_derive 270 13.4 0.003 0.003 12.634 12.634 pw_scatter_s 429 15.8 10.987 10.987 10.987 10.987 xc_rho_set_and_dset_create 45 12.4 1.180 1.180 10.786 10.786 qs_rho_update_rho 45 7.9 0.000 0.000 9.822 9.822 calculate_rho_elec 45 8.9 0.894 0.894 9.821 9.821 fft3d_s 802 15.6 8.501 8.501 8.512 8.512 qmmm_forces 6 3.8 0.002 0.002 8.265 8.265 pw_integral_ab 2539 7.4 7.836 7.836 7.836 7.836 qmmm_forces_with_gaussian 6 4.8 0.221 0.221 7.738 7.738 init_scf_loop 6 6.8 0.000 0.000 7.197 7.197 xc_pw_divergence 45 12.4 0.002 0.002 7.026 7.026 qs_ks_ddapc 45 10.4 0.001 0.001 6.850 6.850 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.550 6.550 pw_poisson_solve 51 9.9 2.469 2.469 5.562 5.562 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.540 5.540 qmmm_forces_gaussian_low_G 6 6.8 5.461 5.461 5.461 5.461 density_rs2pw 45 9.9 0.003 0.003 4.624 4.624 sum_up_and_integrate 45 10.4 0.257 0.257 4.378 4.378 grid_collocate_task_list 45 9.9 4.303 4.303 4.303 4.303 cp_ddapc_apply_CD 45 11.4 0.007 0.007 4.279 4.279 fist_calc_energy_force 6 3.8 0.002 0.002 4.260 4.260 integrate_v_rspace 45 11.4 0.014 0.014 4.122 4.122 force_nonbond 6 4.8 2.986 2.986 2.986 2.986 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.036 75.718 75.719 qs_mol_dyn_low 1 2.0 0.005 0.005 74.119 74.217 qs_forces 6 3.8 0.001 0.001 53.897 53.897 qs_energies 6 4.8 0.001 0.001 51.367 51.367 scf_env_do_scf 6 5.8 0.000 0.001 50.061 50.061 scf_env_do_scf_inner_loop 113 6.2 0.003 0.010 48.082 48.084 rebuild_ks_matrix 119 8.1 0.000 0.001 34.791 34.808 qs_ks_build_kohn_sham_matrix 119 9.1 0.020 0.022 34.790 34.808 qs_ks_update_qs_env 119 7.3 0.001 0.001 32.721 32.738 velocity_verlet 5 3.0 0.002 0.003 31.122 31.127 pw_transfer 2446 12.3 0.211 0.227 20.450 20.792 fft_wrap_pw1pw2 2059 13.4 0.026 0.030 19.856 20.234 fft_wrap_pw1pw2_150 1321 14.9 1.787 1.976 19.189 19.537 qs_vxc_create 119 10.1 0.003 0.003 17.042 17.052 xc_vxc_pw_create 119 11.1 0.200 0.260 17.039 17.048 fft3d_ps 2059 15.4 8.903 9.752 14.936 15.543 qs_rho_update_rho 119 7.3 0.001 0.001 14.020 14.021 calculate_rho_elec 119 8.3 0.089 0.098 14.019 14.020 sum_up_and_integrate 119 10.1 0.075 0.098 13.138 13.190 integrate_v_rspace 119 11.1 0.004 0.005 13.063 13.120 xc_pw_derive 714 13.1 0.011 0.013 12.485 12.899 qmmm_forces 6 3.8 0.002 0.003 11.115 11.116 qmmm_forces_with_gaussian 6 4.8 0.262 0.338 10.732 10.875 rs_pw_transfer 988 11.5 0.014 0.015 9.094 9.603 xc_pw_divergence 119 12.1 0.006 0.007 8.329 8.611 xc_rho_set_and_dset_create 119 12.1 0.544 0.662 8.180 8.526 density_rs2pw 119 9.3 0.009 0.010 7.866 8.289 qmmm_el_coupling 6 3.8 0.000 0.000 7.848 7.967 qmmm_elec_with_gaussian 6 4.8 0.241 0.334 7.846 7.965 potential_pw2rs 119 12.1 0.010 0.011 6.961 6.986 grid_collocate_task_list 119 9.3 5.886 6.281 5.886 6.281 grid_integrate_task_list 119 12.1 5.720 5.976 5.720 5.976 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.775 5.863 mp_alltoall_z22v 2059 17.4 3.933 5.358 3.933 5.358 qmmm_forces_gaussian_low_G 6 6.8 4.716 4.807 4.716 4.807 mp_waitany 4028 12.8 3.138 3.920 3.138 3.920 yz_to_x 964 16.0 0.764 0.924 2.868 3.823 pw_restrict_s3 18 5.8 2.133 2.185 3.657 3.788 rs_pw_transfer_PW2RS_150 125 13.9 1.720 1.980 3.711 3.769 qs_scf_new_mos 113 7.2 0.001 0.001 3.737 3.746 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.737 3.745 rs_pw_transfer_RS2PW_150 125 11.2 1.408 1.648 3.225 3.726 ot_scf_mini 113 9.2 0.002 0.002 3.573 3.582 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.408 3.470 dbcsr_multiply_generic 2588 12.3 0.100 0.117 3.387 3.448 x_to_yz 1095 16.8 1.296 1.479 3.125 3.391 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.024 3.139 pw_prolongate_s3 18 6.8 1.728 1.784 3.024 3.139 qmmm_elec_gaussian_low_G 6 6.8 2.479 2.540 2.479 2.540 qs_ks_ddapc 119 10.1 0.003 0.003 2.333 2.476 pw_integral_ab 2761 7.7 1.726 1.865 2.177 2.336 mp_sum_dm3 33 5.7 2.146 2.296 2.146 2.296 ot_mini 113 10.2 0.001 0.001 2.262 2.273 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.079 2.080 mp_waitall_1 188862 16.2 1.866 2.051 1.866 2.051 init_scf_loop 6 6.8 0.000 0.000 1.975 1.975 mp_sum_d 5820 12.2 1.229 1.844 1.229 1.844 qs_ot_get_derivative 113 11.2 0.001 0.001 1.782 1.791 pw_gather_p 964 15.0 1.599 1.770 1.599 1.770 pw_scatter_p 1095 15.8 1.460 1.523 1.460 1.523 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=36.18299999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=65.745, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.987, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.501, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=7.836, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.461, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.303, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=42.355000000000004, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.479, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.726, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.716, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.886, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.72, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=8.903, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.933, yerr=0.0 Summary: Performance test took 63 minutes. Status: OK Removing intermediate container 8c4f43dcf59a ---> 2e72d3eb482d Step 41/41 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 3f7d7765d7e6 Removing intermediate container 3f7d7765d7e6 ---> f54f0a906abd [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built f54f0a906abd Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-b51:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-03-14 20:20:53+00:00