StartDate: 2022-06-08 11:21:40+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: 2b3941e753317e96f8b6a78b2e81498f35d01b90 CommitTime: 2022-06-08 12:51:03 +0200 CommitAuthor: Holly Judge CommitSubject: Add get_cell and get_qmmm_cell to libcp2k Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=2b3941e753317e96f8b6a78b2e81498f35d01b90 Sending build context to Docker daemon 363.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 405f018f9d1d: Already exists Digest: sha256:b6b83d3c331794420340093eb706a6f152d9c1fa51b262d9bf34594887c2c7ac Status: Downloaded newer image for ubuntu:22.04 ---> 27941809078c Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 92ee757f28a3 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> a398cc4ae5b3 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> e20712e9c254 Step 5/42 : RUN mkdir scripts ---> Using cache ---> c7b9413ca6be Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 5f4bcd2de9f5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> c39d97839810 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> d0b21d05b338 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 6e616c7670ff Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 5ba44cd61a38 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 1aa896c19a24 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> c7ccbf5e1b85 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 9cb6a1bd2cd3 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 1eed70bdd06a Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> ac04ff4ae473 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> cd0e2369620a Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 1b4ef27dc823 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 8c2ce1cbdb23 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> c08a0bc6f0ef Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 377bc99f74ae Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 415f340401e7 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 46d9c769ebc6 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> f0e60294b67a Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> d15ac218a434 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 90d72dc38daa Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> eab42cb47970 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 91fe84927564 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 57dd88e1422f Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> b506dc2b218b Step 30/42 : COPY ./Makefile . ---> Using cache ---> ddbbaa780d9d Step 31/42 : COPY ./src ./src ---> 1a54d50a583f Step 32/42 : COPY ./exts ./exts ---> 726efd4b7b60 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> d2bacc09eb85 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in a0e93d3ce98b './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container a0e93d3ce98b ---> 27efead55503 Step 35/42 : COPY ./data ./data ---> 6022a0ac4bb0 Step 36/42 : COPY ./tests ./tests ---> 0253116309e5 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 6d5531adb44d Step 38/42 : COPY ./benchmarks ./benchmarks ---> 3016bb92fc0e Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 04f1dde02432 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in a0841c9ff5d8 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.027 0.027 199.663 199.663 qs_mol_dyn_low 1 2.0 0.003 0.003 199.029 199.029 qs_forces 11 3.9 0.001 0.001 198.975 198.975 qs_energies 11 4.9 0.001 0.001 192.291 192.291 scf_env_do_scf 11 5.9 0.001 0.001 179.517 179.517 velocity_verlet 10 3.0 0.002 0.002 136.791 136.791 init_scf_loop 11 6.9 0.000 0.000 98.305 98.305 prepare_preconditioner 11 7.9 0.000 0.000 95.486 95.486 make_preconditioner 11 8.9 0.000 0.000 95.486 95.486 make_full_inverse_cholesky 11 9.9 0.000 0.000 94.246 94.246 scf_env_do_scf_inner_loop 108 6.5 0.012 0.012 81.079 81.079 cp_fm_cholesky_invert 11 10.9 65.929 65.929 65.929 65.929 qs_scf_new_mos 108 7.5 0.001 0.001 42.962 42.962 qs_scf_loop_do_ot 108 8.5 0.001 0.001 42.961 42.961 ot_scf_mini 108 9.5 0.003 0.003 41.356 41.356 cp_fm_cholesky_decompose 22 10.9 24.303 24.303 24.303 24.303 qs_ot_get_p 119 10.4 0.001 0.001 22.708 22.708 rebuild_ks_matrix 119 8.3 0.001 0.001 22.575 22.575 qs_ks_build_kohn_sham_matrix 119 9.3 0.013 0.013 22.575 22.575 dbcsr_multiply_generic 2286 12.5 0.175 0.175 21.688 21.688 qs_rho_update_rho 119 7.7 0.001 0.001 21.617 21.617 calculate_rho_elec 119 8.7 0.987 0.987 21.616 21.616 qs_ot_p2m_diag 50 11.0 0.158 0.158 21.215 21.215 cp_dbcsr_syevd 50 12.0 0.003 0.003 20.775 20.775 qs_ks_update_qs_env 119 7.6 0.001 0.001 20.681 20.681 cp_fm_diag_elpa 50 13.0 0.000 0.000 19.521 19.521 cp_fm_diag_elpa_base 50 14.0 19.455 19.455 19.520 19.520 grid_collocate_task_list 119 9.7 16.938 16.938 16.938 16.938 ot_mini 108 10.5 0.001 0.001 14.842 14.842 sum_up_and_integrate 119 10.3 0.226 0.226 14.162 14.162 integrate_v_rspace 119 11.3 0.103 0.103 13.936 13.936 grid_integrate_task_list 119 12.3 11.835 11.835 11.835 11.835 make_m2s 4572 13.5 0.050 0.050 11.540 11.540 qs_ot_get_derivative 108 11.5 0.001 0.001 9.042 9.042 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.645 6.645 pw_transfer 1439 11.6 0.069 0.069 6.402 6.402 fft_wrap_pw1pw2 1201 12.6 0.007 0.007 6.154 6.154 multiply_cannon 2286 13.5 0.189 0.189 6.043 6.043 dbcsr_make_dense_low 5837 15.5 0.070 0.070 6.008 6.008 make_dense_data 5837 16.5 5.372 5.372 5.922 5.922 make_images 4572 14.5 2.198 2.198 5.869 5.869 ot_diis_step 108 11.5 0.004 0.004 5.796 5.796 multiply_cannon_loop 2286 14.5 0.134 0.134 5.530 5.530 multiply_cannon_multrec 2286 15.5 5.335 5.335 5.395 5.395 fft_wrap_pw1pw2_140 487 13.2 0.511 0.511 5.289 5.289 dbcsr_make_images_dense 3978 14.8 0.019 0.019 5.262 5.262 init_scf_run 11 5.9 0.002 0.002 5.238 5.238 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.236 5.236 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.228 5.228 apply_single 119 13.6 0.000 0.000 5.227 5.227 wfi_extrapolate 11 7.9 0.001 0.001 4.689 4.689 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.022 54.439 54.450 qs_mol_dyn_low 1 2.0 0.004 0.006 54.329 54.333 qs_forces 11 3.9 0.001 0.002 54.283 54.283 qs_energies 11 4.9 0.001 0.001 50.484 50.486 scf_env_do_scf 11 5.9 0.001 0.002 46.272 46.272 scf_env_do_scf_inner_loop 108 6.5 0.003 0.021 42.647 42.647 velocity_verlet 10 3.0 0.002 0.003 33.253 33.255 rebuild_ks_matrix 119 8.3 0.001 0.001 21.076 21.160 qs_ks_build_kohn_sham_matrix 119 9.3 0.016 0.024 21.076 21.159 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.721 18.797 sum_up_and_integrate 119 10.3 0.022 0.026 16.272 16.332 integrate_v_rspace 119 11.3 0.004 0.005 16.250 16.315 qs_rho_update_rho 119 7.7 0.001 0.001 16.126 16.137 calculate_rho_elec 119 8.7 0.032 0.034 16.125 16.137 dbcsr_multiply_generic 2286 12.5 0.079 0.094 14.113 14.577 grid_collocate_task_list 119 9.7 10.256 12.367 10.256 12.367 grid_integrate_task_list 119 12.3 9.273 12.308 9.273 12.308 qs_scf_new_mos 108 7.5 0.001 0.001 11.282 11.378 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.281 11.378 ot_scf_mini 108 9.5 0.002 0.003 10.577 10.672 multiply_cannon 2286 13.5 0.148 0.166 10.098 10.476 multiply_cannon_loop 2286 14.5 0.098 0.130 9.453 9.838 mp_waitall_1 169478 16.3 8.316 8.942 8.316 8.942 rs_pw_transfer 974 11.9 0.012 0.020 6.515 7.061 ot_mini 108 10.5 0.001 0.001 6.230 6.332 multiply_cannon_metrocomm3 18288 15.5 0.040 0.055 5.251 5.979 density_rs2pw 119 9.7 0.005 0.007 5.410 5.929 mp_alltoall_d11v 2130 13.8 3.423 3.930 3.423 3.930 pw_transfer 1439 11.6 0.092 0.104 3.759 3.825 potential_pw2rs 119 12.3 0.007 0.009 3.785 3.814 multiply_cannon_multrec 18288 15.5 3.309 3.672 3.320 3.686 rs_gather_matrices 119 12.3 0.101 0.117 3.149 3.663 fft_wrap_pw1pw2 1201 12.6 0.010 0.012 3.591 3.649 mp_waitany 9880 13.7 3.015 3.640 3.015 3.640 init_scf_loop 11 6.9 0.000 0.000 3.610 3.611 qs_ot_get_derivative 108 11.5 0.001 0.001 3.169 3.265 rs_pw_transfer_RS2PW_140 130 11.5 0.305 0.381 2.691 3.231 fft_wrap_pw1pw2_140 487 13.2 0.304 0.358 3.021 3.137 ot_diis_step 108 11.5 0.004 0.005 3.026 3.027 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.835 2.961 apply_single 119 13.6 0.000 0.000 2.835 2.961 init_scf_run 11 5.9 0.000 0.005 2.884 2.885 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.884 2.884 make_m2s 4572 13.5 0.049 0.061 2.667 2.749 fft3d_ps 1201 14.6 1.320 1.463 2.613 2.718 wfi_extrapolate 11 7.9 0.001 0.001 2.631 2.631 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.523 2.534 make_images 4572 14.5 0.127 0.152 2.301 2.375 rs_pw_transfer_PW2RS_140 130 13.9 0.636 0.745 1.616 1.674 mp_sum_d 4129 12.0 1.220 1.538 1.220 1.538 qs_ot_get_p 119 10.4 0.001 0.001 1.370 1.478 mp_sum_l 11218 13.2 0.855 1.432 0.855 1.432 make_images_data 4572 15.5 0.038 0.049 1.223 1.381 prepare_preconditioner 11 7.9 0.000 0.000 1.314 1.345 make_preconditioner 11 8.9 0.000 0.000 1.314 1.345 mp_alltoall_z22v 1201 16.6 1.015 1.266 1.015 1.266 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 1.006 1.250 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.195 1.212 multiply_cannon_metrocomm1 18288 15.5 0.020 0.030 0.581 1.194 hybrid_alltoall_any 4725 16.4 0.074 0.315 1.067 1.189 qs_energies_init_hamiltonians 11 5.9 0.000 0.003 1.099 1.103 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=55.867999999999995, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=65.929, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=24.303, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=19.455, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=16.938, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.835, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.335, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=19.861999999999995, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.256, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.273, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.309, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.316, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.423, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.038 0.038 224.591 224.591 qs_mol_dyn_low 1 2.0 0.002 0.002 223.906 223.906 qs_forces 11 3.9 0.001 0.001 223.866 223.866 qs_energies 11 4.9 0.001 0.001 215.534 215.534 scf_env_do_scf 11 5.9 0.001 0.001 199.811 199.811 velocity_verlet 10 3.0 0.002 0.002 156.817 156.817 init_scf_loop 11 6.9 0.000 0.000 101.395 101.395 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 98.273 98.273 prepare_preconditioner 11 7.9 0.000 0.000 96.843 96.843 make_preconditioner 11 8.9 0.000 0.000 96.843 96.843 make_full_inverse_cholesky 11 9.9 0.000 0.000 95.714 95.714 cp_fm_cholesky_invert 11 10.9 67.020 67.020 67.020 67.020 qs_scf_new_mos 96 7.5 0.001 0.001 38.501 38.501 qs_scf_loop_do_ot 96 8.5 0.001 0.001 38.501 38.501 ot_scf_mini 96 9.5 0.002 0.002 37.092 37.092 rebuild_ks_matrix 107 8.3 0.001 0.001 36.457 36.457 qs_ks_build_kohn_sham_matrix 107 9.3 0.012 0.012 36.457 36.457 qs_rho_update_rho 107 7.7 0.001 0.001 34.609 34.609 calculate_rho_elec 107 8.7 0.881 0.881 34.609 34.609 qs_ks_update_qs_env 107 7.6 0.001 0.001 32.725 32.725 grid_collocate_task_list 107 9.7 30.406 30.406 30.406 30.406 sum_up_and_integrate 107 10.3 0.198 0.198 29.021 29.021 integrate_v_rspace 107 11.3 0.092 0.092 28.824 28.824 grid_integrate_task_list 107 12.3 26.958 26.958 26.958 26.958 cp_fm_cholesky_decompose 22 10.9 24.923 24.923 24.923 24.923 qs_ot_get_p 107 10.4 0.001 0.001 20.429 20.429 qs_ot_p2m_diag 44 11.0 0.152 0.152 19.277 19.277 cp_dbcsr_syevd 44 12.0 0.002 0.002 18.893 18.893 dbcsr_multiply_generic 1966 12.4 0.157 0.157 18.862 18.862 cp_fm_diag_elpa 44 13.0 0.000 0.000 17.652 17.652 cp_fm_diag_elpa_base 44 14.0 17.598 17.598 17.652 17.652 ot_mini 96 10.5 0.001 0.001 13.317 13.317 make_m2s 3932 13.4 0.042 0.042 9.878 9.878 qs_ot_get_derivative 96 11.5 0.001 0.001 8.529 8.529 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.633 7.633 init_scf_run 11 5.9 0.002 0.002 7.046 7.046 scf_env_initial_rho_setup 11 6.9 0.001 0.001 7.044 7.044 wfi_extrapolate 11 7.9 0.001 0.001 6.281 6.281 pw_transfer 1295 11.6 0.057 0.057 5.765 5.765 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.540 5.540 multiply_cannon 1966 13.4 0.160 0.160 5.365 5.365 dbcsr_make_dense_low 4961 15.5 0.059 0.059 5.205 5.205 make_dense_data 4961 16.5 4.590 4.590 5.135 5.135 make_images 3932 14.4 1.973 1.973 4.958 4.958 multiply_cannon_loop 1966 14.4 0.090 0.090 4.930 4.930 multiply_cannon_multrec 1966 15.4 4.785 4.785 4.839 4.839 ot_diis_step 96 11.5 0.003 0.003 4.785 4.785 fft_wrap_pw1pw2_140 439 13.2 0.403 0.403 4.762 4.762 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.626 4.626 dbcsr_make_images_dense 3386 14.7 0.016 0.016 4.586 4.586 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.029 87.152 87.163 qs_mol_dyn_low 1 2.0 0.003 0.004 87.030 87.042 qs_forces 11 3.9 0.001 0.002 86.990 86.990 qs_energies 11 4.9 0.001 0.001 81.067 81.070 scf_env_do_scf 11 5.9 0.000 0.002 75.109 75.109 scf_env_do_scf_inner_loop 96 6.5 0.002 0.017 69.609 69.609 velocity_verlet 10 3.0 0.001 0.003 51.764 51.765 rebuild_ks_matrix 107 8.3 0.000 0.001 38.634 38.726 qs_ks_build_kohn_sham_matrix 107 9.3 0.013 0.019 38.633 38.725 sum_up_and_integrate 107 10.3 0.016 0.019 34.100 34.479 integrate_v_rspace 107 11.3 0.004 0.005 34.084 34.465 qs_ks_update_qs_env 107 7.6 0.001 0.001 34.093 34.173 qs_rho_update_rho 107 7.7 0.001 0.001 32.874 32.882 calculate_rho_elec 107 8.7 0.027 0.028 32.874 32.882 grid_integrate_task_list 107 12.3 23.698 30.872 23.698 30.872 grid_collocate_task_list 107 9.7 22.975 29.616 22.975 29.616 dbcsr_multiply_generic 1966 12.4 0.064 0.076 13.007 18.589 rs_pw_transfer 878 11.9 0.010 0.013 10.727 11.582 density_rs2pw 107 9.7 0.004 0.006 9.519 10.400 qs_scf_new_mos 96 7.5 0.001 0.001 9.854 9.958 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.853 9.957 multiply_cannon 1966 13.4 0.120 0.133 9.321 9.575 ot_scf_mini 96 9.5 0.002 0.002 9.236 9.330 multiply_cannon_loop 1966 14.4 0.083 0.118 8.771 9.096 mp_waitall_1 146670 16.2 7.957 8.669 7.957 8.669 mp_waitany 8968 13.7 7.570 8.589 7.570 8.589 mp_alltoall_d11v 1998 13.7 7.173 8.477 7.173 8.477 rs_gather_matrices 107 12.3 0.079 0.095 6.939 8.240 rs_pw_transfer_RS2PW_140 118 11.5 0.256 0.296 7.202 8.050 mp_sum_l 9666 13.1 1.074 6.783 1.074 6.783 multiply_cannon_metrocomm3 15728 15.4 0.034 0.049 5.074 5.694 ot_mini 96 10.5 0.001 0.001 5.400 5.502 init_scf_loop 11 6.9 0.000 0.001 5.486 5.486 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.696 4.707 init_scf_run 11 5.9 0.000 0.004 4.694 4.694 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.693 4.694 wfi_extrapolate 11 7.9 0.001 0.001 4.268 4.268 multiply_cannon_multrec 15728 15.4 2.868 3.482 2.877 3.493 potential_pw2rs 107 12.3 0.006 0.008 3.408 3.450 pw_transfer 1295 11.6 0.078 0.087 3.199 3.250 fft_wrap_pw1pw2 1081 12.6 0.008 0.011 3.060 3.100 qs_ot_get_derivative 96 11.5 0.001 0.001 2.803 2.895 fft_wrap_pw1pw2_140 439 13.2 0.234 0.290 2.545 2.628 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.547 2.622 apply_single 107 13.6 0.000 0.000 2.547 2.622 ot_diis_step 96 11.5 0.003 0.004 2.576 2.576 fft3d_ps 1081 14.6 1.065 1.222 2.252 2.374 make_m2s 3932 13.4 0.040 0.051 2.212 2.295 make_images 3932 14.4 0.105 0.128 1.908 1.988 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=57.686000000000035, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=67.02, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=30.406, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.958, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=24.923, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=17.598, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=17.778999999999996, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.975, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=23.698, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=7.173, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.957, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=7.57, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.197 0.197 164.197 164.197 qs_energies 1 2.0 0.000 0.000 163.306 163.306 scf_env_do_scf 1 3.0 0.000 0.000 161.581 161.581 qs_ks_update_qs_env 8 5.0 0.000 0.000 122.166 122.166 rebuild_ks_matrix 7 6.0 0.000 0.000 122.106 122.106 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 122.106 122.106 hfx_ks_matrix 7 8.0 0.000 0.000 103.485 103.485 integrate_four_center 7 9.0 1.405 1.405 103.463 103.463 init_scf_loop 1 4.0 0.000 0.000 91.488 91.488 integrate_four_center_main 7 10.0 0.794 0.794 89.831 89.831 integrate_four_center_bin 454 11.0 89.038 89.038 89.038 89.038 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 70.080 70.080 prepare_preconditioner 1 5.0 0.000 0.000 33.858 33.858 make_preconditioner 1 6.0 0.000 0.000 33.858 33.858 arnoldi_normal_ev 11 9.3 0.002 0.002 20.727 20.727 estimate_cond_num 1 7.0 0.000 0.000 20.682 20.682 build_subspace 28 9.5 0.012 0.012 20.555 20.555 integrate_four_center_load 7 10.0 0.000 0.000 11.954 11.954 hfx_load_balance 1 11.0 0.001 0.001 11.954 11.954 dbcsr_sym_m_v_mult 562 10.0 0.019 0.019 11.212 11.212 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 10.809 10.809 admm_fit_mo_coeffs 7 9.0 0.000 0.000 9.474 9.474 cp_fm_cholesky_invert 2 9.5 8.879 8.879 8.879 8.879 make_full_inverse_cholesky 1 7.0 0.000 0.000 8.531 8.531 DGKS_ortho_d 673 10.6 7.624 7.624 7.627 7.627 dbcsr_copy 1318 10.8 1.726 1.726 6.019 6.019 hfx_load_balance_bin 1 12.0 5.978 5.978 5.978 5.978 hfx_load_balance_count 1 12.0 5.958 5.958 5.958 5.958 Gram_Schmidt_ortho_d 673 10.6 5.877 5.877 5.877 5.877 purify_mo_diag 7 10.0 0.001 0.001 5.034 5.034 dbcsr_create_new 3176 12.1 2.726 2.726 4.501 4.501 make_full_single_inverse 1 7.0 0.000 0.000 4.468 4.468 fit_mo_coeffs 7 10.0 0.000 0.000 4.439 4.439 cp_fm_syevd 7 11.0 0.000 0.000 4.433 4.433 cp_fm_syevd_base 7 12.0 4.432 4.432 4.432 4.432 arnoldi_generalized_ev 1 8.0 0.000 0.000 4.377 4.377 gev_build_subspace 4 9.0 0.005 0.005 4.138 4.138 qs_scf_new_mos 7 5.0 0.000 0.000 4.070 4.070 qs_scf_loop_do_ot 7 6.0 0.000 0.000 4.070 4.070 ot_scf_mini 7 7.0 0.000 0.000 3.972 3.972 qs_vxc_create 14 8.0 0.000 0.000 3.847 3.847 xc_vxc_pw_create 14 9.0 0.127 0.127 3.847 3.847 dbcsr_set 2825 11.8 0.004 0.004 3.797 3.797 dbcsr_zero 2837 12.8 3.793 3.793 3.793 3.793 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.214 0.241 139.777 139.792 qs_energies 1 2.0 0.000 0.000 139.433 139.441 scf_env_do_scf 1 3.0 0.000 0.001 138.990 138.990 qs_ks_update_qs_env 8 5.0 0.000 0.000 136.681 136.681 rebuild_ks_matrix 7 6.0 0.000 0.000 136.672 136.672 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 136.672 136.672 hfx_ks_matrix 7 8.0 0.000 0.000 130.151 130.153 integrate_four_center 7 9.0 0.055 0.387 130.142 130.143 integrate_four_center_main 7 10.0 0.003 0.004 83.254 116.880 integrate_four_center_bin 448 11.0 83.251 116.877 83.251 116.877 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 78.536 78.536 init_scf_loop 1 4.0 0.000 0.000 60.452 60.453 mp_sync 70 11.3 33.648 36.069 33.648 36.069 integrate_four_center_load 7 10.0 0.000 0.000 12.489 12.493 hfx_load_balance 1 11.0 0.001 0.001 12.489 12.493 mp_sum_l 1135 8.3 6.172 6.465 6.172 6.465 hfx_load_balance_dist 1 12.0 0.000 0.000 6.016 6.307 hfx_load_balance_count 1 12.0 3.212 6.204 3.212 6.204 hfx_load_balance_bin 1 12.0 3.172 6.195 3.172 6.195 qs_vxc_create 14 8.0 0.001 0.001 3.040 3.040 xc_vxc_pw_create 14 9.0 0.008 0.009 3.039 3.039 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=46.72000000000001, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=89.038, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=8.879, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=7.624, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.978, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.958, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=10.321999999999974, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=83.251, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.172, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.212, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=6.172, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=33.648, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 159.166 159.166 qs_energies 1 2.0 0.000 0.000 158.771 158.771 mp2_main 1 3.0 0.000 0.000 133.298 133.298 mp2_gpw_main 1 4.0 0.000 0.000 132.334 132.334 rpa_ri_compute_en 1 5.0 0.000 0.000 124.870 124.870 rpa_num_int 1 6.0 0.001 0.001 124.864 124.864 compute_mat_P_omega 1 7.0 0.003 0.003 72.528 72.528 compute_mat_P_omega_contract 10 8.0 8.533 8.533 72.325 72.325 dbt_total 2336 9.6 0.012 0.012 60.240 60.240 dbt_contract 787 11.0 0.033 0.033 53.322 53.322 dbt_tas_total 1149 12.2 0.185 0.185 51.986 51.986 dbt_tas_multiply 807 12.1 0.002 0.002 50.563 50.563 dbt_tas_dbm 807 14.1 0.003 0.003 43.637 43.637 dbm_multiply 807 16.1 43.627 43.627 43.627 43.627 GW_matrix_operations 10 7.0 0.004 0.004 31.940 31.940 dbt_tas_mm_1N 524 15.1 0.002 0.002 31.214 31.214 cp_fm_cholesky_invert 10 8.0 31.129 31.129 31.129 31.129 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 25.967 25.967 scf_env_do_scf 1 3.0 0.000 0.000 25.306 25.306 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 25.306 25.306 qs_scf_new_mos 17 5.0 0.000 0.000 23.648 23.648 eigensolver 18 5.9 0.001 0.001 21.566 21.566 compute_mat_P_omega_calc_M_occ 250 9.0 8.546 8.546 20.692 20.692 cp_fm_cholesky_decompose 14 8.1 14.988 14.988 14.988 14.988 cp_fm_diag_elpa 18 6.9 0.000 0.000 14.769 14.769 cp_fm_diag_elpa_base 18 7.9 14.702 14.702 14.769 14.769 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 13.412 13.412 RPA_postprocessing_nokp 10 8.0 0.001 0.001 12.414 12.414 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.601 11.601 dbt_tas_mm_2 251 15.0 0.001 0.001 10.388 10.388 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 7.457 7.457 cp_fm_cholesky_restore 51 7.0 6.724 6.724 6.724 6.724 compute_QP_energies 1 7.0 0.000 0.000 5.993 5.993 compute_self_energy_cubic_gw 1 8.0 0.050 0.050 5.992 5.992 get_2c_integrals 1 6.0 0.000 0.000 5.846 5.846 dbt_copy 1103 10.7 0.069 0.069 5.477 5.477 contract_cubic_gw 21 9.0 0.000 0.000 4.853 4.853 dbt_tas_reserve_blocks_index 3261 14.3 0.154 0.154 3.360 3.360 dbm_reserve_blocks 3628 15.3 3.280 3.280 3.280 3.280 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.022 36.186 36.197 qs_energies 1 2.0 0.001 0.001 36.086 36.087 mp2_main 1 3.0 0.000 0.001 34.894 34.895 mp2_gpw_main 1 4.0 0.000 0.001 34.849 34.850 rpa_ri_compute_en 1 5.0 0.000 0.000 33.382 33.383 rpa_num_int 1 6.0 0.001 0.009 33.381 33.382 dbt_total 2336 9.6 0.012 0.014 29.783 29.811 compute_mat_P_omega 1 7.0 0.001 0.005 28.661 28.680 compute_mat_P_omega_contract 10 8.0 0.434 0.486 28.499 28.505 dbt_contract 787 11.0 0.028 0.031 22.378 22.384 dbt_tas_total 1149 12.2 0.054 0.069 19.986 19.987 dbt_tas_multiply 807 12.1 0.002 0.003 19.925 19.927 dbt_tas_dbm 807 14.1 0.003 0.004 14.757 14.809 dbm_multiply 807 16.1 11.275 12.206 11.275 12.206 compute_mat_P_omega_calc_M_occ 250 9.0 0.417 0.475 8.477 8.477 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.449 8.449 mp_sync 8706 11.6 5.763 7.286 5.763 7.286 dbt_tas_mm_2 251 15.0 0.001 0.002 6.954 6.968 dbt_copy 1111 10.7 0.012 0.014 6.349 6.871 dbt_reshape 1098 11.7 2.350 3.031 6.049 6.535 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.079 6.080 dbt_tas_mm_1N 524 15.1 0.002 0.002 5.177 5.701 mp_waitall_2 3776 15.3 2.907 3.173 2.907 3.173 dbt_communicate_buffer 1098 12.7 0.059 0.079 2.941 3.106 compute_QP_energies 1 7.0 0.000 0.000 3.026 3.028 compute_self_energy_cubic_gw 1 8.0 0.002 0.003 3.023 3.026 contract_cubic_gw 21 9.0 0.000 0.000 2.367 2.368 dbt_crop 1042 12.0 0.994 1.366 1.550 2.011 dbt_reserve_blocks_index 2849 13.1 0.071 0.090 1.722 1.999 dbt_reserve_blocks_index_array 2791 12.2 0.009 0.011 1.721 1.997 dbt_tas_reserve_blocks_index 3300 14.5 0.126 0.165 1.689 1.962 dbm_reserve_blocks 3696 15.4 1.667 1.936 1.667 1.936 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.463 1.465 dbt_tas_replicate 396 14.1 0.593 0.783 1.253 1.429 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.153 1.160 scf_env_do_scf 1 3.0 0.000 0.000 1.134 1.135 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 1.134 1.134 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.067 1.079 convert_to_new_pgrid 2421 14.1 0.023 0.034 0.878 1.041 dbm_copy 1608 15.1 0.849 1.014 0.849 1.014 cp_gemm 105 8.4 0.000 0.000 1.000 1.009 cp_gemm_cosma 105 9.4 1.000 1.009 1.000 1.009 mp_max_i 1992 9.8 0.761 0.916 0.761 0.916 dbm_add 807 14.1 0.687 0.772 0.687 0.772 GW_matrix_operations 10 7.0 0.001 0.001 0.758 0.764 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=42.894000000000005, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=43.627, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=31.129, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=14.988, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=14.702, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.546, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.28, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.807000000000002, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=11.275, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.417, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.667, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.907, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.35, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.763, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.019 349.150 349.150 qs_forces 1 2.0 0.000 0.000 348.576 348.576 rebuild_ks_matrix 7 6.6 0.000 0.000 326.534 326.534 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 326.534 326.534 hfx_ks_matrix 7 8.6 0.000 0.000 324.427 324.427 dbt_total 4939 11.6 0.031 0.031 265.455 265.455 hfx_ri_update_ks 7 9.6 0.000 0.000 264.857 264.857 hfx_ri_update_ks_Pmat 7 10.6 35.270 35.270 264.854 264.854 qs_energies 1 3.0 0.000 0.000 252.246 252.246 scf_env_do_scf 1 4.0 0.001 0.001 251.794 251.794 dbt_tas_total 2391 14.1 1.090 1.090 234.163 234.163 qs_ks_update_qs_env 8 6.0 0.000 0.000 230.258 230.258 dbt_contract 1473 13.0 0.160 0.160 214.973 214.973 dbt_tas_multiply 1482 14.0 0.004 0.004 202.649 202.649 dbt_tas_dbm 1482 16.0 0.007 0.007 178.789 178.789 dbm_multiply 1482 18.0 178.770 178.770 178.770 178.770 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 178.077 178.077 dbt_tas_mm_2 649 17.1 0.005 0.005 147.601 147.601 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 142.697 142.697 init_scf_loop 2 5.0 0.000 0.000 109.095 109.095 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 96.279 96.279 hfx_ri_update_forces 1 7.0 0.000 0.000 59.567 59.567 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 43.792 43.792 dbt_tas_mm_3T 659 17.1 0.002 0.002 23.916 23.916 dbt_tas_reshape 906 14.4 0.021 0.021 23.872 23.872 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 20.887 20.887 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 20.762 20.762 prepare_preconditioner 2 6.0 0.000 0.000 18.975 18.975 dbt_copy 2411 12.3 0.234 0.234 18.923 18.923 make_preconditioner 2 7.0 0.000 0.000 18.555 18.555 cp_fm_syevd 12 10.7 0.000 0.000 18.026 18.026 cp_fm_syevd_base 12 11.7 18.026 18.026 18.026 18.026 make_full_all 2 8.0 0.000 0.000 18.005 18.005 dbt_tas_merge 649 14.1 13.415 13.415 15.010 15.010 dbt_tas_reshape_buffer_fill 906 15.4 13.367 13.367 13.367 13.367 precalc_derivatives 1 8.0 0.007 0.007 12.927 12.927 dbm_reserve_blocks 8383 16.8 12.806 12.806 12.806 12.806 dbt_tas_reserve_blocks_index 7477 16.0 0.367 0.367 12.157 12.157 dbt_crop 2763 14.2 7.673 7.673 12.050 12.050 dbt_reshape 856 13.9 6.044 6.044 10.425 10.425 dbt_reserve_blocks_index 4998 15.2 0.144 0.144 9.356 9.356 dbt_reserve_blocks_index_array 4963 14.3 0.020 0.020 9.292 9.292 hfx_ri_pre_scf_Pmat_2c 1 13.0 0.000 0.000 9.048 9.048 reshape_mm_small 906 15.6 0.145 0.145 7.690 7.690 dbt_tas_reshape_buffer_obtain 906 15.4 6.934 6.934 7.669 7.669 dbt_tas_replicate 906 15.6 5.481 5.481 7.365 7.365 build_3c_derivatives 9 9.0 2.676 2.676 7.213 7.213 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.020 60.327 60.337 qs_forces 1 2.0 0.000 0.000 60.108 60.108 rebuild_ks_matrix 7 6.6 0.000 0.000 59.407 59.408 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.014 59.407 59.408 hfx_ks_matrix 7 8.6 0.000 0.000 58.307 58.316 dbt_total 4939 11.6 0.027 0.031 52.452 52.484 dbt_contract 1473 13.0 0.094 0.104 39.487 39.499 hfx_ri_update_ks 7 9.6 0.000 0.000 39.382 39.383 hfx_ri_update_ks_Pmat 7 10.6 1.463 2.186 39.375 39.375 dbt_tas_total 2391 14.1 0.117 0.142 37.524 37.526 qs_energies 1 3.0 0.000 0.000 36.072 36.072 scf_env_do_scf 1 4.0 0.000 0.001 35.922 35.922 qs_ks_update_qs_env 8 6.0 0.000 0.000 35.387 35.387 dbt_tas_multiply 1482 14.0 0.005 0.005 32.799 32.801 dbt_tas_dbm 1482 16.0 0.005 0.006 24.867 24.892 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 24.022 24.022 dbm_multiply 1482 18.0 17.336 21.532 17.336 21.532 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.005 21.011 21.012 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 20.756 20.757 hfx_ri_update_forces 1 7.0 0.000 0.001 18.923 18.932 mp_sync 17669 13.5 12.243 15.311 12.243 15.311 init_scf_loop 2 5.0 0.000 0.000 15.164 15.164 dbt_tas_mm_2 649 17.1 0.003 0.004 13.991 14.006 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 13.174 13.191 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.003 8.159 8.160 dbt_copy 2429 12.3 0.032 0.035 6.840 7.616 dbt_tas_mm_3T 659 17.1 0.002 0.002 4.446 5.076 dbt_reshape 1257 13.5 2.238 2.844 4.734 5.064 dbt_crop 2763 14.2 3.118 4.167 3.924 5.063 precalc_derivatives 1 8.0 0.001 0.002 4.452 4.452 dbt_tas_mm_3N 163 16.5 0.000 0.001 4.327 4.370 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.001 4.293 4.294 dbt_tas_merge 649 14.1 1.741 2.628 3.087 3.782 mp_waitall_2 5988 16.5 3.141 3.441 3.141 3.441 dbm_reserve_blocks 8417 16.9 2.437 2.928 2.437 2.928 dbt_tas_reserve_blocks_index 7508 16.1 0.260 0.358 2.262 2.746 mp_max_i 3372 12.5 2.021 2.484 2.021 2.484 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.473 2.482 dbt_tas_communicate_buffer 1825 16.3 0.061 0.079 2.163 2.419 dbt_tas_replicate 909 15.6 0.609 0.789 2.273 2.378 dbt_reserve_blocks_index 5399 15.2 0.126 0.150 1.933 2.312 dbt_reserve_blocks_index_array 5364 14.2 0.013 0.016 1.930 2.308 dbt_tas_reshape 916 14.4 0.008 0.009 1.978 2.164 mp_alltoall_i 4341 15.3 1.884 2.126 1.884 2.126 build_3c_derivatives 9 9.0 0.244 0.435 2.098 2.101 dbt_communicate_buffer 1257 14.5 0.044 0.077 1.688 1.854 convert_to_new_pgrid 4446 16.0 0.034 0.042 1.300 1.530 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.001 0.002 1.520 1.521 dbm_copy 3043 16.9 1.266 1.497 1.266 1.497 mp_sum_l 38201 15.3 1.074 1.311 1.074 1.311 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=69.82299999999998, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=178.77, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=35.27, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=18.026, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=13.415, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=13.367, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=12.806, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=7.673, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=18.848, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=17.336, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.463, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.741, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.437, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=3.118, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=12.243, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.141, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 235.985 235.985 qs_energies 1 2.0 0.000 0.000 235.794 235.794 mp2_main 1 3.0 0.000 0.000 199.603 199.603 mp2_gpw_main 1 4.0 0.001 0.001 194.930 194.930 mp2_ri_gpw_compute_in 1 5.0 0.394 0.394 147.638 147.638 mp2_ri_gpw_compute_in_loop 1 6.0 0.012 0.012 120.840 120.840 mp2_eri_3c_integrate_gpw 2656 7.0 0.015 0.015 90.777 90.777 integrate_v_rspace 2666 8.0 0.696 0.696 77.096 77.096 grid_integrate_task_list 2666 9.0 74.292 74.292 74.292 74.292 mp2_ri_gpw_compute_en 1 5.0 0.075 0.075 47.269 47.269 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.975 9.975 45.460 45.460 scf_env_do_scf 1 3.0 0.000 0.000 35.257 35.257 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 35.257 35.257 qs_scf_new_mos 10 5.0 0.000 0.000 33.916 33.916 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.392 2.392 27.197 27.197 get_2c_integrals 1 6.0 0.000 0.000 26.335 26.335 eigensolver 11 5.8 0.001 0.001 26.285 26.285 offload_gemm 2080 8.0 24.806 24.806 24.806 24.806 calculate_wavefunction 5312 9.0 16.460 16.460 24.517 24.517 cp_fm_diag_elpa 11 6.8 0.000 0.000 23.854 23.854 cp_fm_diag_elpa_base 11 7.8 23.691 23.691 23.854 23.854 dbcsr_multiply_generic 5322 8.0 0.205 0.205 23.170 23.170 ao_to_mo_and_store_B_mult_1 2656 7.0 0.012 0.012 23.151 23.151 compute_2c_integrals 1 7.0 0.007 0.007 19.290 19.290 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 19.268 19.268 mp2_eri_2c_integrate_gpw 1 9.0 3.461 3.461 19.257 19.257 pw_transfer 63872 10.6 0.922 0.922 12.422 12.422 multiply_cannon 5322 9.0 0.461 0.461 11.814 11.814 fft_wrap_pw1pw2 53228 11.4 0.114 0.114 11.230 11.230 multiply_cannon_loop 5322 10.0 0.393 0.393 10.367 10.367 qs_diis_b_step 9 6.0 0.001 0.001 9.676 9.676 make_m2s 10644 9.0 0.070 0.070 8.876 8.876 multiply_cannon_multrec 5322 11.0 8.713 8.713 8.755 8.755 make_images 10644 10.0 3.267 3.267 8.545 8.545 cp_fm_symm 18 7.0 8.385 8.385 8.385 8.385 fft_wrap_pw1pw2_20 21271 12.4 0.629 0.629 7.882 7.882 fft3d_s 53229 13.4 7.111 7.111 7.148 7.148 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.537 2.537 6.801 6.801 mp2_ri_gpw_compute_en_ener 2080 7.0 6.280 6.280 6.280 6.280 cp_fm_triangular_invert 2 6.0 6.177 6.177 6.177 6.177 copy_dbcsr_to_fm 2679 8.0 0.031 0.031 5.115 5.115 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.024 46.577 46.591 qs_energies 1 2.0 0.001 0.001 46.499 46.500 mp2_main 1 3.0 0.000 0.001 43.939 43.939 mp2_gpw_main 1 4.0 0.001 0.001 43.809 43.809 mp2_ri_gpw_compute_in 1 5.0 0.045 0.046 20.631 26.708 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 18.655 24.733 mp2_ri_gpw_compute_en 1 5.0 0.082 0.098 23.086 23.601 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 16.351 22.458 integrate_v_rspace 93 8.1 0.115 0.128 16.254 22.235 grid_integrate_task_list 93 9.1 15.837 21.883 15.837 21.883 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.698 0.873 15.919 15.984 mp2_ri_gpw_compute_en_expansio 65 7.0 0.091 0.120 12.251 12.754 offload_gemm 65 8.0 12.160 12.663 12.160 12.663 mp_min_d 2 7.0 6.139 6.758 6.139 6.758 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 6.078 6.593 mp2_ri_gpw_compute_en_comm 17 7.0 0.118 0.231 2.576 3.046 mp_sendrecv_dm3 510 8.0 1.916 2.496 1.916 2.496 scf_env_do_scf 1 3.0 0.000 0.000 2.415 2.416 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.415 2.416 dbcsr_multiply_generic 176 8.0 0.009 0.009 1.976 2.251 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.957 2.235 get_2c_integrals 1 6.0 0.000 0.000 1.903 1.929 compute_2c_integrals 1 7.0 0.003 0.004 1.574 1.591 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 1.148 1.499 mp2_eri_2c_integrate_gpw 1 9.0 0.234 0.369 1.147 1.498 qs_scf_new_mos 10 5.0 0.000 0.000 1.203 1.288 calculate_wavefunction 166 9.0 0.565 0.809 0.981 1.272 multiply_cannon 176 9.0 0.017 0.018 1.170 1.264 multiply_cannon_loop 176 10.0 0.002 0.002 1.107 1.202 eigensolver 11 5.8 0.001 0.001 1.180 1.182 multiply_cannon_multrec 246 11.0 0.961 1.013 0.967 1.019 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.972 0.973 cp_fm_redistribute_end 11 7.8 0.369 0.961 0.385 0.965 pw_transfer 2120 10.5 0.044 0.055 0.875 0.965 make_m2s 352 9.0 0.003 0.003 0.765 0.944 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=78.04800000000003, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=74.292, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=24.806, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=23.691, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=16.46, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.975, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.713, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=8.300999999999995, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.837, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=12.16, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.565, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.698, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.961, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.916, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=6.139, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.078 0.078 236.509 236.509 qs_energies 1 2.0 0.000 0.000 235.114 235.114 scf_env_do_scf 1 3.0 0.000 0.000 224.917 224.917 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 224.917 224.917 qs_scf_new_mos 15 5.0 0.000 0.000 143.480 143.480 eigensolver 15 6.0 0.002 0.002 135.306 135.306 cp_fm_diag_elpa 15 7.0 0.000 0.000 121.173 121.173 cp_fm_diag_elpa_base 15 8.0 118.780 118.780 121.172 121.172 qs_ks_update_qs_env 15 5.0 0.000 0.000 53.285 53.285 rebuild_ks_matrix 15 6.0 0.000 0.000 53.067 53.067 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 53.067 53.067 qs_vxc_create 15 8.0 0.016 0.016 35.723 35.723 calculate_dispersion_nonloc 15 9.0 7.244 7.244 31.017 31.017 qs_rho_update_rho 16 5.0 0.000 0.000 24.555 24.555 calculate_rho_elec 16 6.0 0.218 0.218 24.555 24.555 pw_transfer 1191 10.0 0.067 0.067 24.205 24.205 fft_wrap_pw1pw2 1086 11.0 0.011 0.011 23.977 23.977 grid_collocate_task_list 16 7.0 23.183 23.183 23.183 23.183 fft_wrap_pw1pw2_150 765 12.0 3.325 3.325 17.185 17.185 sum_up_and_integrate 15 8.0 0.046 0.046 15.866 15.866 integrate_v_rspace 15 9.0 0.022 0.022 15.820 15.820 grid_integrate_task_list 15 10.0 15.258 15.258 15.258 15.258 cp_fm_cholesky_restore 45 7.0 11.857 11.857 11.857 11.857 fft3d_s 1087 13.0 11.287 11.287 11.300 11.300 pw_scatter_s 585 13.1 7.176 7.176 7.176 7.176 fft_wrap_pw1pw2_200 197 12.3 0.770 0.770 6.595 6.595 init_scf_run 1 3.0 0.000 0.000 6.495 6.495 dbcsr_complete_redistribute 46 8.3 2.326 2.326 5.755 5.755 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.731 5.731 gspace_mixing 14 5.0 0.172 0.172 4.982 4.982 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.027 70.871 70.882 qs_energies 1 2.0 0.000 0.005 70.571 70.577 scf_env_do_scf 1 3.0 0.000 0.001 65.841 65.841 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 65.841 65.841 qs_ks_update_qs_env 15 5.0 0.000 0.000 28.687 28.701 rebuild_ks_matrix 15 6.0 0.000 0.000 28.639 28.654 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.003 28.639 28.654 qs_rho_update_rho 16 5.0 0.000 0.000 23.901 23.907 calculate_rho_elec 16 6.0 0.007 0.007 23.901 23.906 grid_collocate_task_list 16 7.0 22.357 22.943 22.357 22.943 sum_up_and_integrate 15 8.0 0.008 0.011 16.551 16.608 integrate_v_rspace 15 9.0 0.001 0.001 16.544 16.605 grid_integrate_task_list 15 10.0 15.050 15.602 15.050 15.602 qs_scf_new_mos 15 5.0 0.000 0.000 14.059 14.216 eigensolver 15 6.0 0.001 0.002 12.967 13.004 qs_vxc_create 15 8.0 0.001 0.001 11.746 11.760 calculate_dispersion_nonloc 15 9.0 1.093 2.035 9.619 9.649 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.395 9.400 cp_fm_diag_elpa_base 15 8.0 9.229 9.262 9.391 9.394 pw_transfer 1191 10.0 0.082 0.095 9.014 9.157 fft_wrap_pw1pw2 1086 11.0 0.012 0.013 8.831 8.995 fft3d_ps 1086 13.0 2.705 3.102 6.985 7.302 fft_wrap_pw1pw2_150 765 12.0 0.290 0.337 6.064 6.145 mp_alltoall_z22v 1086 15.0 3.635 4.363 3.635 4.363 cp_fm_cholesky_restore 45 7.0 3.388 3.469 3.388 3.469 yz_to_x 501 13.9 0.252 0.325 2.695 3.085 qs_energies_init_hamiltonians 1 3.0 0.000 0.001 2.873 2.874 fft_wrap_pw1pw2_200 197 12.3 0.204 0.247 2.636 2.736 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.441 2.733 xc_vxc_pw_create 15 9.0 0.016 0.021 2.127 2.158 rs_pw_transfer 158 9.4 0.002 0.002 1.513 1.834 x_to_yz 585 14.1 0.367 0.413 1.559 1.815 density_rs2pw 16 7.0 0.001 0.001 1.408 1.655 build_core_ppnl 1 5.0 1.424 1.622 1.424 1.622 init_scf_run 1 3.0 0.000 0.000 1.604 1.605 vdW_energy 15 10.0 1.510 1.588 1.510 1.588 xc_pw_derive 90 11.0 0.001 0.001 1.445 1.531 scf_env_initial_rho_setup 1 4.0 0.000 0.001 1.510 1.511 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=56.14399999999998, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=118.78, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.183, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.258, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.857, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.287, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=17.212000000000003, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.229, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.357, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.05, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.388, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.635, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.079 0.079 246.596 246.596 qs_energies 1 2.0 0.000 0.000 246.455 246.455 ls_scf 1 3.0 0.000 0.000 245.124 245.124 ls_scf_main 1 4.0 0.002 0.002 231.885 231.885 density_matrix_trs4 11 5.0 0.013 0.013 135.566 135.566 ls_scf_dm_to_ks 11 5.0 0.000 0.000 90.964 90.964 matrix_ls_to_qs 11 6.0 0.000 0.000 87.702 87.702 dbcsr_multiply_generic 185 6.1 0.813 0.813 70.432 70.432 arnoldi_extremal 12 6.1 0.000 0.000 57.553 57.553 arnoldi_normal_ev 12 7.1 0.018 0.018 57.553 57.553 build_subspace 23 8.1 0.079 0.079 56.640 56.640 dbcsr_copy_into_existing 11 7.0 48.440 48.440 48.440 48.440 dbcsr_complete_redistribute 23 7.5 31.381 31.381 43.067 43.067 multiply_cannon 185 7.1 0.309 0.309 39.832 39.832 matrix_decluster 11 7.0 0.000 0.000 39.261 39.261 dbcsr_matrix_vector_mult 652 9.0 0.169 0.169 30.197 30.197 multiply_cannon_loop 185 8.1 0.258 0.258 27.199 27.199 make_m2s 370 7.1 0.038 0.038 25.821 25.821 multiply_cannon_multrec 185 9.1 25.260 25.260 25.351 25.351 make_images 370 8.1 10.782 10.782 24.009 24.009 dbcsr_matrix_vector_mult_local 652 10.0 19.220 19.220 19.225 19.225 dbcsr_finalize 646 7.5 0.183 0.183 15.682 15.682 dbcsr_merge_all 597 8.5 2.461 2.461 14.487 14.487 DGKS_ortho_d 702 9.1 13.236 13.236 13.240 13.240 ls_scf_init_scf 1 4.0 0.000 0.000 12.495 12.495 setup_rec_index_2d 370 8.1 12.229 12.229 12.229 12.229 ls_scf_init_matrix_S 1 5.0 0.000 0.000 12.149 12.149 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 11.472 11.472 Gram_Schmidt_ortho_d 702 9.1 11.157 11.157 11.159 11.159 dbcsr_sort_indices 1103 9.9 10.682 10.682 10.682 10.682 tree_to_linear_d 110 9.4 10.656 10.656 10.656 10.656 quick_finalize 395 10.0 0.379 0.379 9.238 9.238 dbcsr_special_finalize 370 9.1 0.002 0.002 8.547 8.547 matrix_qs_to_ls 12 5.1 0.000 0.000 5.587 5.587 matrix_cluster 12 6.1 0.000 0.000 5.587 5.587 create_fast_row_vec_access 776 10.2 0.002 0.002 4.949 4.949 create_fast_row_vec_access_d 776 11.2 4.943 4.943 4.948 4.948 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.021 73.772 73.783 qs_energies 1 2.0 0.000 0.000 73.664 73.664 ls_scf 1 3.0 0.000 0.000 73.603 73.604 ls_scf_main 1 4.0 0.000 0.007 70.912 70.912 density_matrix_trs4 11 5.0 0.006 0.019 67.892 67.996 dbcsr_multiply_generic 185 6.1 0.063 0.092 63.976 64.321 multiply_cannon 185 7.1 0.034 0.040 53.389 54.681 multiply_cannon_loop 185 8.1 0.122 0.154 50.680 51.622 multiply_cannon_multrec 1480 9.1 31.415 36.289 31.713 36.650 mp_waitall_1 11936 10.3 16.864 21.010 16.864 21.010 multiply_cannon_metrocomm3 1480 9.1 0.015 0.018 9.699 16.655 multiply_cannon_metrocomm1 1480 9.1 0.008 0.012 4.108 8.865 make_m2s 370 7.1 0.036 0.041 7.447 7.579 make_images 370 8.1 0.671 0.775 7.309 7.442 calculate_norms 2960 9.1 4.978 6.887 4.978 6.887 make_images_data 370 9.1 0.010 0.020 3.316 3.712 mp_sum_l 1119 5.6 2.128 3.208 2.128 3.208 hybrid_alltoall_any 393 9.9 0.202 0.908 2.869 3.134 arnoldi_extremal 12 6.1 0.000 0.000 2.889 2.915 arnoldi_normal_ev 12 7.1 0.001 0.004 2.889 2.915 build_subspace 23 8.1 0.021 0.027 2.782 2.784 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.651 2.748 dbcsr_matrix_vector_mult 652 9.0 0.010 0.047 2.227 2.473 dbcsr_complete_redistribute 23 7.5 1.306 1.726 2.200 2.383 matrix_ls_to_qs 11 6.0 0.000 0.000 2.139 2.357 matrix_decluster 11 7.0 0.000 0.000 1.990 2.148 dbcsr_matrix_vector_mult_local 652 10.0 1.778 2.123 1.780 2.126 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.207 2.042 make_images_pack 370 9.1 1.769 2.000 1.772 2.004 ls_scf_init_scf 1 4.0 0.000 0.000 1.979 1.980 ls_scf_init_matrix_S 1 5.0 0.000 0.000 1.952 1.958 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.788 1.789 buffer_matrices_ensure_size 370 8.1 1.482 1.743 1.482 1.743 dbcsr_finalize 646 7.5 0.008 0.010 1.404 1.610 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=109.059, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=48.44, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=31.381, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=25.26, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=19.22, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=13.236, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=15.303000000000011, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.306, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=31.415, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.778, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.128, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.978, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.864, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 74.029 74.029 lib_test 1 2.0 0.000 0.000 73.923 73.923 dbcsr_run_tests 3 3.0 0.002 0.002 73.922 73.922 test_multiplies_multiproc 3 4.0 0.001 0.001 57.537 57.537 dbcsr_redistribute 9 5.0 37.411 37.411 38.959 38.959 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.938 16.938 dbcsr_make_random_matrix 9 4.0 12.745 12.745 16.286 16.286 multiply_cannon 9 6.0 0.001 0.001 12.204 12.204 multiply_cannon_loop 9 7.0 0.032 0.032 11.840 11.840 multiply_cannon_multrec 9 8.0 11.807 11.807 11.808 11.808 dbcsr_finalize 27 5.7 0.040 0.040 6.150 6.150 dbcsr_merge_all 18 6.5 2.062 2.062 5.445 5.445 dbcsr_data_release 975 7.6 2.555 2.555 2.555 2.555 tree_to_linear_d 9 7.0 1.903 1.903 1.903 1.903 make_m2s 18 6.0 0.001 0.001 1.559 1.559 make_images 18 7.0 0.549 0.549 1.509 1.509 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.002 0.011 21.702 21.707 lib_test 1 2.0 0.000 0.000 21.672 21.691 dbcsr_run_tests 3 3.0 0.000 0.001 21.671 21.690 test_multiplies_multiproc 3 4.0 0.000 0.003 20.750 20.837 dbcsr_multiply_generic 9 5.0 0.001 0.001 18.689 18.790 multiply_cannon 9 6.0 0.002 0.002 16.662 17.159 multiply_cannon_loop 9 7.0 0.002 0.003 16.298 16.713 multiply_cannon_multrec 72 8.0 13.442 14.716 13.443 14.717 mp_waitall_1 576 9.2 3.242 4.373 3.242 4.373 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 2.518 3.825 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.329 1.513 mp_sum_l 390 2.5 0.570 1.239 0.570 1.239 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.564 1.232 dbcsr_make_random_matrix 9 4.0 0.729 1.010 0.891 1.121 make_m2s 18 6.0 0.001 0.001 0.833 0.894 make_images 18 7.0 0.022 0.027 0.829 0.890 dbcsr_finalize 27 5.7 0.000 0.000 0.681 0.791 dbcsr_merge_all 18 6.5 0.113 0.145 0.590 0.703 dbcsr_data_release 444 7.6 0.616 0.678 0.616 0.678 dbcsr_destroy 111 5.9 0.000 0.001 0.527 0.634 dbcsr_checksum 6 5.0 0.180 0.593 0.593 0.593 make_images_data 18 8.0 0.001 0.001 0.444 0.564 dbcsr_redistribute 9 5.0 0.281 0.346 0.515 0.557 hybrid_alltoall_any 18 9.0 0.045 0.218 0.395 0.507 mp_sum_d 191 1.2 0.415 0.437 0.415 0.437 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=7.448999999999998, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=37.411, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.745, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.807, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.555, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.062, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.709000000000003, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.281, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.729, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=13.442, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.616, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.113, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.242, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.57, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.050 0.050 145.935 145.935 qs_mol_dyn_low 1 2.0 0.003 0.003 144.331 144.331 velocity_verlet 5 3.0 0.003 0.003 118.365 118.365 qmmm_el_coupling 6 3.8 0.000 0.000 91.133 91.133 qmmm_elec_with_gaussian 6 4.8 0.065 0.065 91.122 91.122 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 90.431 90.431 qmmm_elec_gaussian_low_G 6 6.8 89.501 89.501 89.501 89.501 qs_forces 6 3.8 0.000 0.000 44.301 44.301 qs_energies 6 4.8 0.000 0.000 39.995 39.995 scf_env_do_scf 6 5.8 0.001 0.001 36.587 36.587 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.872 25.872 rebuild_ks_matrix 45 8.4 0.000 0.000 24.936 24.936 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.936 24.936 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.072 21.072 pw_transfer 966 12.3 0.048 0.048 17.172 17.172 fft_wrap_pw1pw2 801 13.6 0.006 0.006 16.897 16.897 fft_wrap_pw1pw2_150 507 15.2 2.127 2.127 16.451 16.451 qs_vxc_create 45 10.4 0.001 0.001 13.254 13.254 xc_vxc_pw_create 45 11.4 0.649 0.649 13.253 13.253 init_scf_loop 6 6.8 0.000 0.000 10.711 10.711 xc_pw_derive 270 13.4 0.002 0.002 9.127 9.127 fft3d_s 802 15.6 7.846 7.846 7.853 7.853 prepare_preconditioner 6 7.8 0.000 0.000 7.402 7.402 qs_rho_update_rho 45 7.9 0.000 0.000 7.232 7.232 calculate_rho_elec 45 8.9 0.563 0.563 7.232 7.232 make_preconditioner 6 8.8 0.000 0.000 7.033 7.033 xc_rho_set_and_dset_create 45 12.4 0.660 0.660 6.923 6.923 make_full_all 6 9.8 0.001 0.001 6.570 6.570 xc_pw_divergence 45 12.4 0.001 0.001 5.633 5.633 pw_scatter_s 429 15.8 5.590 5.590 5.590 5.590 qmmm_forces 6 3.8 0.002 0.002 5.501 5.501 qmmm_forces_with_gaussian 6 4.8 0.076 0.076 5.163 5.163 qs_ks_ddapc 45 10.4 0.001 0.001 4.410 4.410 pw_integral_ab 2539 7.4 4.344 4.344 4.344 4.344 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.311 4.311 cp_fm_diag_elpa 18 11.2 0.000 0.000 3.935 3.935 cp_fm_diag_elpa_base 18 12.2 3.925 3.925 3.934 3.934 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.872 3.872 qmmm_forces_gaussian_low_G 6 6.8 3.584 3.584 3.584 3.584 grid_collocate_task_list 45 9.9 3.385 3.385 3.385 3.385 density_rs2pw 45 9.9 0.001 0.001 3.284 3.284 sum_up_and_integrate 45 10.4 0.124 0.124 3.169 3.169 integrate_v_rspace 45 11.4 0.008 0.008 3.044 3.044 pw_poisson_solve 51 9.9 1.257 1.257 3.039 3.039 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.050 64.775 64.786 qs_mol_dyn_low 1 2.0 0.003 0.005 63.447 63.502 qs_forces 6 3.8 0.001 0.001 46.093 46.093 qs_energies 6 4.8 0.000 0.001 44.116 44.117 scf_env_do_scf 6 5.8 0.000 0.001 43.072 43.072 scf_env_do_scf_inner_loop 113 6.2 0.002 0.019 41.420 41.421 rebuild_ks_matrix 119 8.1 0.000 0.000 30.461 30.471 qs_ks_build_kohn_sham_matrix 119 9.1 0.016 0.022 30.461 30.470 qs_ks_update_qs_env 119 7.3 0.001 0.001 28.785 28.794 velocity_verlet 5 3.0 0.002 0.004 25.359 25.362 pw_transfer 2446 12.3 0.179 0.192 19.413 20.083 fft_wrap_pw1pw2 2059 13.4 0.022 0.025 18.994 19.698 fft_wrap_pw1pw2_150 1321 14.9 1.481 1.811 18.151 18.736 fft3d_ps 2059 15.4 7.413 8.509 14.480 15.756 qs_vxc_create 119 10.1 0.003 0.003 15.472 15.483 xc_vxc_pw_create 119 11.1 0.165 0.240 15.470 15.480 qs_rho_update_rho 119 7.3 0.001 0.001 12.337 12.345 calculate_rho_elec 119 8.3 0.049 0.055 12.336 12.344 xc_pw_derive 714 13.1 0.007 0.011 11.644 12.014 sum_up_and_integrate 119 10.1 0.064 0.088 10.911 11.040 integrate_v_rspace 119 11.1 0.004 0.004 10.846 10.975 rs_pw_transfer 988 11.5 0.012 0.016 8.636 9.108 qmmm_forces 6 3.8 0.002 0.002 8.513 8.513 qmmm_forces_with_gaussian 6 4.8 0.307 0.378 7.645 8.426 xc_rho_set_and_dset_create 119 12.1 0.396 0.641 7.406 8.133 qmmm_el_coupling 6 3.8 0.000 0.000 7.705 7.976 qmmm_elec_with_gaussian 6 4.8 0.320 0.387 7.703 7.974 xc_pw_divergence 119 12.1 0.004 0.005 7.599 7.884 density_rs2pw 119 9.3 0.005 0.008 7.259 7.615 mp_alltoall_z22v 2059 17.4 5.552 7.591 5.552 7.591 potential_pw2rs 119 12.1 0.007 0.009 6.732 6.757 grid_collocate_task_list 119 9.3 4.847 5.428 4.847 5.428 x_to_yz 1095 16.8 0.862 0.993 3.842 4.716 yz_to_x 964 16.0 0.601 0.761 3.173 4.458 mp_waitany 4028 12.8 3.268 4.388 3.268 4.388 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.082 4.387 grid_integrate_task_list 119 12.1 3.770 4.174 3.770 4.174 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.660 3.960 qmmm_forces_gaussian_low_G 6 6.8 3.359 3.666 3.359 3.666 rs_pw_transfer_PW2RS_150 125 13.9 1.310 1.667 3.422 3.531 qmmm_elec_gaussian_low_G 6 6.8 3.052 3.331 3.052 3.331 rs_pw_transfer_RS2PW_150 125 11.2 0.920 1.232 2.707 3.191 pw_restrict_s3 18 5.8 1.400 1.742 2.550 2.975 mp_waitall_1 188862 16.2 2.256 2.686 2.256 2.686 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.295 2.547 pw_prolongate_s3 18 6.8 1.219 1.458 2.295 2.547 dbcsr_multiply_generic 2588 12.3 0.061 0.076 2.261 2.416 qs_scf_new_mos 113 7.2 0.001 0.001 2.310 2.325 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.309 2.324 ot_scf_mini 113 9.2 0.001 0.001 2.216 2.222 mp_sum_dm3 33 5.7 2.053 2.212 2.053 2.212 qs_ks_ddapc 119 10.1 0.002 0.002 2.088 2.207 mp_sum_d 5820 12.2 1.287 1.950 1.287 1.950 pw_integral_ab 2761 7.7 1.134 1.337 1.629 1.805 pw_scatter_p 1095 15.8 1.621 1.735 1.621 1.735 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.689 1.690 pw_gather_p 964 15.0 1.356 1.657 1.356 1.657 init_scf_loop 6 6.8 0.000 0.000 1.649 1.650 ot_mini 113 10.2 0.001 0.001 1.363 1.369 mp_sum_dm 514 5.2 0.891 1.364 0.891 1.364 rs_pw_transfer_PW2RS_40 119 14.1 0.240 0.302 1.196 1.342 pw_copy 1670 12.6 1.099 1.302 1.099 1.302 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=27.75999999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=89.501, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.846, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.59, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.344, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=3.925, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.584, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.385, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=35.64800000000001, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=3.052, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.134, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.359, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.847, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.77, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.552, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.413, yerr=0.0 Summary: Performance test took 45 minutes. Status: OK Removing intermediate container a0841c9ff5d8 ---> edaaafb2d58a Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in ae55881b7bdb Removing intermediate container ae55881b7bdb ---> 9a12fa7cec24 Step 42/42 : ENTRYPOINT [] ---> Running in e2e3f89c16ca Removing intermediate container e2e3f89c16ca ---> 9a83019a4235 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 9a83019a4235 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-06-08 12:18:14+00:00