StartDate: 2022-08-17 19:05:58+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: da366e26351cbdc79f7d4328dfd7b793bf23b9fb CommitTime: 2022-08-17 17:04:37 +0200 CommitAuthor: Matthias Krack CommitSubject: Adjust tolerance Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=da366e26351cbdc79f7d4328dfd7b793bf23b9fb Sending build context to Docker daemon 364.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu d19f32bd9e41: Already exists Digest: sha256:34fea4f31bf187bc915536831fd0afc9d214755bf700b5cdb1336c82516d154e Status: Downloaded newer image for ubuntu:22.04 ---> df5de72bdb3b Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 026e35f2a85c Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 61e4cd54df66 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 9302e3cfee49 Step 5/42 : RUN mkdir scripts ---> Using cache ---> ba1db08844ca Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 4fa59dbbe3c5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 92a7ebae54e4 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> fdcb02913a6b Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> bbf5a70e57e9 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 57713487903f Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> a7273f099530 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 5f5e186a6792 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> daaf389ae447 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 6767e014fd7d Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 8bd37651db46 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 7036e597e5e8 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> ff2b1e1cb16d Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> da0abcd2ccad Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> ea1516b197ec Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> c5c4ef116433 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 0e2954296d9e Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> d0b0ced73c5c Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> cc25e8fd9377 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 915029cf8ed8 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 20510d2c9104 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 55a50ff28104 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 02b1fc9204dc Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 67cf34b67198 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 74e513362a2c Step 30/42 : COPY ./Makefile . ---> Using cache ---> a24b1f25fab5 Step 31/42 : COPY ./src ./src ---> Using cache ---> 16fd990e3bf0 Step 32/42 : COPY ./exts ./exts ---> Using cache ---> e2ec86de0646 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> f4aa9f0fc203 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Using cache ---> b157dfe2269c Step 35/42 : COPY ./data ./data ---> Using cache ---> d53ff35bfa2f Step 36/42 : COPY ./tests ./tests ---> 44c33230dfd2 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 42f5a9a52274 Step 38/42 : COPY ./benchmarks ./benchmarks ---> 694dfcc3076d Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 28632e9f9b6f Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 54cd600670eb ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.031 104.725 104.725 qs_mol_dyn_low 1 2.0 0.003 0.003 104.081 104.081 qs_forces 11 3.9 0.001 0.001 104.039 104.039 qs_energies 11 4.9 0.001 0.001 97.178 97.178 scf_env_do_scf 11 5.9 0.001 0.001 84.845 84.845 scf_env_do_scf_inner_loop 108 6.5 0.014 0.014 65.820 65.820 velocity_verlet 10 3.0 0.002 0.002 65.625 65.625 qs_scf_new_mos 108 7.5 0.001 0.001 25.611 25.611 qs_scf_loop_do_ot 108 8.5 0.001 0.001 25.610 25.610 ot_scf_mini 108 9.5 0.003 0.003 23.872 23.872 rebuild_ks_matrix 119 8.3 0.001 0.001 23.531 23.531 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.015 23.530 23.530 dbcsr_multiply_generic 2286 12.5 0.183 0.183 23.259 23.259 qs_rho_update_rho_low 119 7.7 0.001 0.001 22.960 22.960 calculate_rho_elec 119 8.7 0.997 0.997 22.959 22.959 qs_ks_update_qs_env 119 7.6 0.001 0.001 21.625 21.625 init_scf_loop 11 6.9 0.000 0.000 18.889 18.889 grid_collocate_task_list 119 9.7 17.602 17.602 17.602 17.602 prepare_preconditioner 11 7.9 0.000 0.000 16.158 16.158 make_preconditioner 11 8.9 0.000 0.000 16.158 16.158 make_full_inverse_cholesky 11 9.9 0.000 0.000 14.933 14.933 sum_up_and_integrate 119 10.3 0.249 0.249 14.577 14.577 integrate_v_rspace 119 11.3 0.115 0.115 14.328 14.328 ot_mini 108 10.5 0.001 0.001 13.618 13.618 make_m2s 4572 13.5 0.051 0.051 12.426 12.426 grid_integrate_task_list 119 12.3 11.832 11.832 11.832 11.832 pw_transfer 1439 11.6 0.074 0.074 7.556 7.556 qs_ot_get_derivative 108 11.5 0.001 0.001 7.372 7.372 fft_wrap_pw1pw2 1201 12.6 0.008 0.008 7.247 7.247 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.146 7.146 dbcsr_make_dense_low 5837 15.5 0.084 0.084 6.563 6.563 make_dense_data 5837 16.5 5.676 5.676 6.464 6.464 make_images 4572 14.5 2.342 2.342 6.338 6.338 multiply_cannon 2286 13.5 0.216 0.216 6.333 6.333 ot_diis_step 108 11.5 0.004 0.004 6.242 6.242 fft_wrap_pw1pw2_140 487 13.2 0.502 0.502 6.220 6.220 qs_ot_get_p 119 10.4 0.001 0.001 6.055 6.055 multiply_cannon_loop 2286 14.5 0.176 0.176 5.784 5.784 cp_fm_cholesky_decompose 22 10.9 5.741 5.741 5.741 5.741 dbcsr_make_images_dense 3978 14.8 0.020 0.020 5.654 5.654 multiply_cannon_multrec 2286 15.5 5.542 5.542 5.607 5.607 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.604 5.604 apply_single 119 13.6 0.001 0.001 5.604 5.604 cp_fm_cholesky_invert 11 10.9 4.852 4.852 4.852 4.852 qs_ot_p2m_diag 50 11.0 0.156 0.156 4.538 4.538 init_scf_run 11 5.9 0.002 0.002 4.470 4.470 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.468 4.468 dbcsr_copy 2102 12.0 0.261 0.261 4.372 4.372 density_rs2pw 119 9.7 0.005 0.005 4.360 4.360 dbcsr_complete_redistribute 329 12.2 2.161 2.161 4.120 4.120 cp_dbcsr_syevd 50 12.0 0.003 0.003 4.078 4.078 dbcsr_copy_into_existing 22 7.9 4.056 4.056 4.057 4.057 cp_fm_diag_elpa 50 13.0 0.000 0.000 3.919 3.919 cp_fm_diag_elpa_base 50 14.0 3.855 3.855 3.919 3.919 wfi_extrapolate 11 7.9 0.001 0.001 3.883 3.883 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.867 3.867 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.613 3.613 fft3d_s 1202 14.6 3.554 3.554 3.559 3.559 copy_dbcsr_to_fm 153 11.3 0.003 0.003 3.326 3.326 qs_create_task_list 11 7.9 0.000 0.000 3.194 3.194 generate_qs_task_list 11 8.9 2.162 2.162 3.194 3.194 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.072 3.072 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.991 2.991 pw_poisson_solve 119 10.3 1.120 1.120 2.805 2.805 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.488 2.488 potential_pw2rs 119 12.3 0.051 0.051 2.381 2.381 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 2.351 2.351 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.321 2.321 hybrid_alltoall_any 4725 16.4 1.938 1.938 2.241 2.241 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.025 58.369 58.378 qs_mol_dyn_low 1 2.0 0.004 0.007 58.251 58.256 qs_forces 11 3.9 0.001 0.002 58.193 58.194 qs_energies 11 4.9 0.001 0.001 54.165 54.166 scf_env_do_scf 11 5.9 0.001 0.002 49.538 49.539 scf_env_do_scf_inner_loop 108 6.5 0.003 0.024 45.770 45.770 velocity_verlet 10 3.0 0.002 0.003 34.779 34.780 rebuild_ks_matrix 119 8.3 0.001 0.001 22.434 22.540 qs_ks_build_kohn_sham_matrix 119 9.3 0.017 0.024 22.433 22.540 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.968 20.064 dbcsr_multiply_generic 2286 12.5 0.085 0.101 16.060 17.955 sum_up_and_integrate 119 10.3 0.024 0.029 16.938 17.167 integrate_v_rspace 119 11.3 0.005 0.006 16.914 17.146 qs_rho_update_rho_low 119 7.7 0.001 0.001 16.716 16.733 calculate_rho_elec 119 8.7 0.032 0.045 16.716 16.732 qs_scf_new_mos 108 7.5 0.001 0.001 12.720 12.815 qs_scf_loop_do_ot 108 8.5 0.001 0.001 12.720 12.815 grid_integrate_task_list 119 12.3 9.460 12.441 9.460 12.441 grid_collocate_task_list 119 9.7 10.421 12.435 10.421 12.435 ot_scf_mini 108 9.5 0.002 0.003 11.904 11.982 multiply_cannon 2286 13.5 0.153 0.185 11.119 11.513 multiply_cannon_loop 2286 14.5 0.105 0.136 10.340 10.739 mp_waitall_1 169478 16.3 9.515 10.098 9.515 10.098 rs_pw_transfer 974 11.9 0.013 0.015 7.142 7.806 ot_mini 108 10.5 0.001 0.001 6.973 7.057 multiply_cannon_metrocomm3 18288 15.5 0.042 0.058 6.030 6.610 density_rs2pw 119 9.7 0.006 0.007 5.815 6.462 potential_pw2rs 119 12.3 0.007 0.008 4.297 4.312 pw_transfer 1439 11.6 0.093 0.102 4.191 4.252 fft_wrap_pw1pw2 1201 12.6 0.010 0.013 4.014 4.072 mp_alltoall_d11v 2130 13.8 3.379 4.051 3.379 4.051 mp_waitany 9880 13.7 3.270 4.037 3.270 4.037 rs_gather_matrices 119 12.3 0.119 0.135 3.108 3.826 init_scf_loop 11 6.9 0.000 0.000 3.752 3.753 multiply_cannon_multrec 18288 15.5 3.318 3.706 3.330 3.722 qs_ot_get_derivative 108 11.5 0.001 0.001 3.643 3.718 rs_pw_transfer_RS2PW_140 130 11.5 0.352 0.397 2.877 3.546 fft_wrap_pw1pw2_140 487 13.2 0.345 0.381 3.299 3.364 mp_sum_l 11218 13.2 1.387 3.303 1.387 3.303 ot_diis_step 108 11.5 0.004 0.005 3.285 3.286 init_scf_run 11 5.9 0.000 0.005 3.238 3.238 scf_env_initial_rho_setup 11 6.9 0.000 0.004 3.237 3.238 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.087 3.189 apply_single 119 13.6 0.000 0.000 3.086 3.188 make_m2s 4572 13.5 0.052 0.064 3.029 3.122 fft3d_ps 1201 14.6 1.420 1.516 2.951 3.024 wfi_extrapolate 11 7.9 0.001 0.001 2.908 2.908 make_images 4572 14.5 0.132 0.158 2.656 2.742 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.656 2.673 rs_pw_transfer_PW2RS_140 130 13.9 0.699 0.773 1.837 1.886 qs_ot_get_p 119 10.4 0.001 0.001 1.630 1.730 mp_sum_d 4129 12.0 1.336 1.701 1.336 1.701 make_images_data 4572 15.5 0.039 0.049 1.362 1.498 prepare_preconditioner 11 7.9 0.000 0.000 1.391 1.408 make_preconditioner 11 8.9 0.000 0.000 1.391 1.408 mp_alltoall_z22v 1201 16.6 1.209 1.400 1.209 1.400 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.001 0.986 1.326 hybrid_alltoall_any 4725 16.4 0.077 0.275 1.187 1.311 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.262 1.283 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.190 1.238 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.169 1.221 qs_energies_init_hamiltonians 11 5.9 0.001 0.001 1.171 1.172 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=58.331999999999994, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.602, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.832, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=5.741, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.676, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.542, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=22.276000000000003, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.421, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.46, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.318, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.515, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.379, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.030 0.030 132.139 132.139 qs_mol_dyn_low 1 2.0 0.003 0.003 131.456 131.456 qs_forces 11 3.9 0.001 0.001 131.413 131.413 qs_energies 11 4.9 0.001 0.001 122.725 122.725 scf_env_do_scf 11 5.9 0.002 0.002 108.131 108.131 scf_env_do_scf_inner_loop 96 6.5 0.012 0.012 85.985 85.985 velocity_verlet 10 3.0 0.002 0.002 83.537 83.537 rebuild_ks_matrix 107 8.3 0.001 0.001 38.511 38.511 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.014 38.510 38.510 qs_rho_update_rho_low 107 7.7 0.001 0.001 36.822 36.822 calculate_rho_elec 107 8.7 0.890 0.890 36.821 36.821 qs_ks_update_qs_env 107 7.6 0.001 0.001 34.696 34.696 grid_collocate_task_list 107 9.7 31.947 31.947 31.947 31.947 sum_up_and_integrate 107 10.3 0.215 0.215 29.964 29.964 integrate_v_rspace 107 11.3 0.141 0.141 29.749 29.749 grid_integrate_task_list 107 12.3 27.470 27.470 27.470 27.470 qs_scf_new_mos 96 7.5 0.001 0.001 22.208 22.208 qs_scf_loop_do_ot 96 8.5 0.001 0.001 22.207 22.207 init_scf_loop 11 6.9 0.000 0.000 21.947 21.947 ot_scf_mini 96 9.5 0.003 0.003 20.732 20.732 dbcsr_multiply_generic 1966 12.4 0.165 0.165 20.650 20.650 prepare_preconditioner 11 7.9 0.000 0.000 17.355 17.355 make_preconditioner 11 8.9 0.000 0.000 17.355 17.355 make_full_inverse_cholesky 11 9.9 0.000 0.000 16.152 16.152 ot_mini 96 10.5 0.001 0.001 11.778 11.778 make_m2s 3932 13.4 0.044 0.044 11.032 11.032 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.494 7.494 pw_transfer 1295 11.6 0.067 0.067 6.961 6.961 fft_wrap_pw1pw2 1081 12.6 0.007 0.007 6.677 6.677 qs_ot_get_derivative 96 11.5 0.001 0.001 6.438 6.438 cp_fm_cholesky_decompose 22 10.9 6.428 6.428 6.428 6.428 init_scf_run 11 5.9 0.002 0.002 6.306 6.306 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.304 6.304 fft_wrap_pw1pw2_140 439 13.2 0.530 0.530 5.777 5.777 dbcsr_make_dense_low 4961 15.5 0.085 0.085 5.711 5.711 make_images 3932 14.4 2.111 2.111 5.703 5.703 multiply_cannon 1966 13.4 0.184 0.184 5.661 5.661 make_dense_data 4961 16.5 4.918 4.918 5.613 5.613 wfi_extrapolate 11 7.9 0.001 0.001 5.512 5.512 ot_diis_step 96 11.5 0.004 0.004 5.336 5.336 cp_fm_cholesky_invert 11 10.9 5.320 5.320 5.320 5.320 qs_ot_get_p 107 10.4 0.001 0.001 5.270 5.270 multiply_cannon_loop 1966 14.4 0.128 0.128 5.192 5.192 multiply_cannon_multrec 1966 15.4 5.007 5.007 5.063 5.063 dbcsr_make_images_dense 3386 14.7 0.017 0.017 4.957 4.957 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.849 4.849 apply_single 107 13.6 0.000 0.000 4.849 4.849 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.803 4.803 dbcsr_copy 1855 11.9 0.260 0.260 4.343 4.343 dbcsr_complete_redistribute 317 12.2 2.071 2.071 4.240 4.240 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.004 4.004 dbcsr_copy_into_existing 22 7.9 3.984 3.984 3.985 3.985 density_rs2pw 107 9.7 0.004 0.004 3.984 3.984 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.882 3.882 qs_ot_p2m_diag 44 11.0 0.137 0.137 3.881 3.881 qs_create_task_list 11 7.9 0.000 0.000 3.555 3.555 generate_qs_task_list 11 8.9 2.440 2.440 3.555 3.555 cp_dbcsr_syevd 44 12.0 0.003 0.003 3.497 3.497 copy_dbcsr_to_fm 147 11.2 0.003 0.003 3.442 3.442 cp_fm_diag_elpa 44 13.0 0.000 0.000 3.315 3.315 cp_fm_diag_elpa_base 44 14.0 3.256 3.256 3.315 3.315 fft3d_s 1082 14.6 3.241 3.241 3.252 3.252 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.005 3.005 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.026 95.128 95.138 qs_mol_dyn_low 1 2.0 0.004 0.005 95.017 95.022 qs_forces 11 3.9 0.001 0.002 94.971 94.972 qs_energies 11 4.9 0.001 0.001 88.507 88.512 scf_env_do_scf 11 5.9 0.001 0.002 82.103 82.103 scf_env_do_scf_inner_loop 96 6.5 0.003 0.020 76.108 76.114 velocity_verlet 10 3.0 0.002 0.004 57.037 57.038 rebuild_ks_matrix 107 8.3 0.001 0.001 41.786 41.874 qs_ks_build_kohn_sham_matrix 107 9.3 0.015 0.018 41.786 41.874 sum_up_and_integrate 107 10.3 0.021 0.024 36.900 36.926 qs_ks_update_qs_env 107 7.6 0.001 0.001 36.838 36.913 integrate_v_rspace 107 11.3 0.004 0.005 36.879 36.905 qs_rho_update_rho_low 107 7.7 0.001 0.001 35.210 35.223 calculate_rho_elec 107 8.7 0.029 0.039 35.209 35.222 grid_integrate_task_list 107 12.3 25.593 29.646 25.593 29.646 grid_collocate_task_list 107 9.7 24.851 28.555 24.851 28.555 dbcsr_multiply_generic 1966 12.4 0.074 0.082 14.909 15.091 rs_pw_transfer 878 11.9 0.012 0.014 11.121 12.197 qs_scf_new_mos 96 7.5 0.001 0.001 11.787 11.884 qs_scf_loop_do_ot 96 8.5 0.001 0.001 11.787 11.883 ot_scf_mini 96 9.5 0.002 0.002 11.068 11.158 density_rs2pw 107 9.7 0.005 0.006 9.908 10.979 multiply_cannon 1966 13.4 0.132 0.156 10.334 10.815 multiply_cannon_loop 1966 14.4 0.096 0.124 9.608 9.914 mp_waitall_1 146670 16.2 8.925 9.493 8.925 9.493 mp_alltoall_d11v 1998 13.7 7.551 8.766 7.551 8.766 mp_waitany 8968 13.7 7.579 8.631 7.579 8.631 rs_gather_matrices 107 12.3 0.114 0.125 7.289 8.486 rs_pw_transfer_RS2PW_140 118 11.5 0.287 0.315 7.187 8.244 ot_mini 96 10.5 0.001 0.003 6.560 6.660 multiply_cannon_metrocomm3 15728 15.4 0.038 0.048 5.655 6.487 init_scf_loop 11 6.9 0.000 0.000 5.974 5.979 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 5.106 5.135 init_scf_run 11 5.9 0.000 0.005 5.029 5.029 scf_env_initial_rho_setup 11 6.9 0.000 0.004 5.029 5.029 wfi_extrapolate 11 7.9 0.001 0.001 4.573 4.573 potential_pw2rs 107 12.3 0.006 0.007 3.949 3.972 pw_transfer 1295 11.6 0.083 0.096 3.846 3.899 fft_wrap_pw1pw2 1081 12.6 0.009 0.010 3.687 3.754 qs_ot_get_derivative 96 11.5 0.001 0.001 3.568 3.661 multiply_cannon_multrec 15728 15.4 3.036 3.339 3.046 3.352 fft_wrap_pw1pw2_140 439 13.2 0.305 0.341 2.983 3.064 ot_diis_step 96 11.5 0.004 0.004 2.943 2.943 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.859 2.937 apply_single 107 13.6 0.000 0.001 2.859 2.937 make_m2s 3932 13.4 0.046 0.051 2.832 2.908 fft3d_ps 1081 14.6 1.267 1.359 2.727 2.814 make_images 3932 14.4 0.117 0.132 2.500 2.579 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=55.96700000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=31.947, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.47, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.428, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=5.32, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.007, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=17.593000000000004, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=24.851, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=25.593, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.036, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.925, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=7.551, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=7.579, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.191 0.191 122.475 122.475 qs_energies 1 2.0 0.000 0.000 121.616 121.616 scf_env_do_scf 1 3.0 0.000 0.000 120.324 120.324 qs_ks_update_qs_env 8 5.0 0.000 0.000 114.439 114.439 rebuild_ks_matrix 7 6.0 0.000 0.000 114.375 114.375 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 114.375 114.375 hfx_ks_matrix 7 8.0 0.000 0.000 104.166 104.166 integrate_four_center 7 9.0 1.653 1.653 104.142 104.142 integrate_four_center_main 7 10.0 0.713 0.713 91.691 91.691 integrate_four_center_bin 458 11.0 90.978 90.978 90.978 90.978 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 63.877 63.877 init_scf_loop 1 4.0 0.000 0.000 56.434 56.434 integrate_four_center_load 7 10.0 0.000 0.000 10.450 10.450 hfx_load_balance 1 11.0 0.016 0.016 10.450 10.450 hfx_load_balance_count 1 12.0 5.887 5.887 5.887 5.887 hfx_load_balance_bin 1 12.0 4.530 4.530 4.530 4.530 qs_vxc_create 14 8.0 0.000 0.000 3.945 3.945 xc_vxc_pw_create 14 9.0 0.122 0.122 3.944 3.944 xc_rho_set_and_dset_create 14 10.0 0.116 0.116 3.098 3.098 calculate_rho_elec 15 7.4 0.126 0.126 2.852 2.852 prepare_preconditioner 1 5.0 0.000 0.000 2.687 2.687 make_preconditioner 1 6.0 0.000 0.000 2.687 2.687 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 2.479 2.479 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.216 0.244 149.504 149.518 qs_energies 1 2.0 0.000 0.000 149.150 149.151 scf_env_do_scf 1 3.0 0.000 0.000 148.713 148.714 qs_ks_update_qs_env 8 5.0 0.000 0.000 146.079 146.080 rebuild_ks_matrix 7 6.0 0.000 0.000 146.069 146.069 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 146.069 146.069 hfx_ks_matrix 7 8.0 0.000 0.001 138.572 138.574 integrate_four_center 7 9.0 0.064 0.402 138.559 138.562 integrate_four_center_main 7 10.0 0.004 0.005 90.892 125.032 integrate_four_center_bin 448 11.0 90.888 125.027 90.888 125.027 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 84.598 84.598 init_scf_loop 1 4.0 0.000 0.000 64.113 64.113 mp_sync 70 11.3 34.182 36.886 34.182 36.886 integrate_four_center_load 7 10.0 0.000 0.000 12.703 12.707 hfx_load_balance 1 11.0 0.001 0.001 12.702 12.707 mp_sum_l 1135 8.3 6.419 6.752 6.419 6.752 hfx_load_balance_dist 1 12.0 0.000 0.000 6.254 6.579 hfx_load_balance_bin 1 12.0 3.195 6.394 3.195 6.394 hfx_load_balance_count 1 12.0 3.166 6.220 3.166 6.220 qs_vxc_create 14 8.0 0.000 0.001 3.452 3.452 xc_vxc_pw_create 14 9.0 0.010 0.012 3.451 3.452 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=18.714, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=90.978, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.887, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.53, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.653, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.713, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.585999999999984, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=90.888, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.166, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.195, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.064, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=6.419, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=34.182, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 91.835 91.835 qs_energies 1 2.0 0.000 0.000 91.416 91.416 mp2_main 1 3.0 0.000 0.000 87.461 87.461 mp2_gpw_main 1 4.0 0.000 0.000 87.287 87.287 rpa_ri_compute_en 1 5.0 0.000 0.000 83.502 83.502 rpa_num_int 1 6.0 0.001 0.001 83.494 83.494 compute_mat_P_omega 1 7.0 0.003 0.003 71.035 71.035 compute_mat_P_omega_contract 10 8.0 9.301 9.301 70.808 70.808 dbt_total 2336 9.6 0.013 0.013 57.698 57.698 dbt_contract 787 11.0 0.039 0.039 49.889 49.889 dbt_tas_total 1149 12.2 0.247 0.247 48.243 48.243 dbt_tas_multiply 807 12.1 0.002 0.002 46.858 46.858 dbt_tas_dbm 807 14.1 0.004 0.004 39.771 39.771 dbm_multiply 807 16.1 39.759 39.759 39.759 39.759 dbt_tas_mm_1N 524 15.1 0.002 0.002 26.493 26.493 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 23.764 23.764 compute_mat_P_omega_calc_M_occ 250 9.0 9.227 9.227 19.259 19.259 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.301 12.301 dbt_tas_mm_2 251 15.0 0.002 0.002 11.172 11.172 compute_QP_energies 1 7.0 0.000 0.000 6.695 6.695 compute_self_energy_cubic_gw 1 8.0 0.050 0.050 6.694 6.694 dbt_copy 1103 10.7 0.109 0.109 6.404 6.404 contract_cubic_gw 21 9.0 0.000 0.000 5.511 5.511 dbt_tas_reserve_blocks_index 3261 14.3 0.159 0.159 3.804 3.804 scf_env_do_scf 1 3.0 0.000 0.000 3.803 3.803 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 3.803 3.803 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.776 3.776 dbm_reserve_blocks 3628 15.3 3.733 3.733 3.733 3.733 dbt_crop 1042 12.0 1.880 1.880 2.937 2.937 dbt_tas_copy 574 11.4 1.740 1.740 2.830 2.830 dbt_reserve_blocks_index 2280 13.1 0.064 0.064 2.774 2.774 dbt_reserve_blocks_index_array 2222 12.2 0.012 0.012 2.766 2.766 convert_to_new_pgrid 2421 14.1 0.154 0.154 2.541 2.541 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.495 2.495 dbm_copy 1614 15.1 2.387 2.387 2.387 2.387 compute_W_cubic_GW 10 7.0 0.005 0.005 2.360 2.360 dbt_reshape 278 11.9 1.152 1.152 2.090 2.090 qs_scf_new_mos 17 5.0 0.000 0.000 2.071 2.071 get_2c_integrals 1 6.0 0.000 0.000 2.042 2.042 dbt_tas_mm_3N 22 15.1 0.000 0.000 1.997 1.997 dbt_tas_reshape 367 15.0 0.007 0.007 1.975 1.975 cp_fm_cholesky_decompose 14 8.1 1.958 1.958 1.958 1.958 eigensolver 18 5.9 0.001 0.001 1.840 1.840 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.022 40.322 40.333 qs_energies 1 2.0 0.000 0.001 40.215 40.217 mp2_main 1 3.0 0.001 0.009 39.096 39.097 mp2_gpw_main 1 4.0 0.000 0.001 39.047 39.049 rpa_ri_compute_en 1 5.0 0.000 0.000 37.571 37.572 rpa_num_int 1 6.0 0.000 0.002 37.570 37.571 dbt_total 2336 9.6 0.014 0.016 33.390 33.415 compute_mat_P_omega 1 7.0 0.001 0.006 32.045 32.057 compute_mat_P_omega_contract 10 8.0 0.495 0.551 31.866 31.870 dbt_contract 787 11.0 0.032 0.037 24.510 24.521 dbt_tas_total 1149 12.2 0.064 0.080 21.917 21.918 dbt_tas_multiply 807 12.1 0.002 0.003 21.834 21.837 dbt_tas_dbm 807 14.1 0.004 0.005 15.988 16.030 dbm_multiply 807 16.1 12.149 13.150 12.149 13.150 compute_mat_P_omega_calc_M_occ 250 9.0 0.463 0.521 9.591 9.591 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.156 9.157 mp_sync 8706 11.6 6.866 7.836 6.866 7.836 dbt_copy 1111 10.7 0.015 0.016 7.296 7.588 dbt_tas_mm_2 251 15.0 0.002 0.002 7.432 7.445 dbt_reshape 1098 11.7 2.643 3.249 6.952 7.196 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.596 6.597 dbt_tas_mm_1N 524 15.1 0.002 0.002 5.683 6.240 mp_waitall_2 3776 15.3 3.367 3.590 3.367 3.590 compute_QP_energies 1 7.0 0.000 0.000 3.570 3.571 dbt_communicate_buffer 1098 12.7 0.059 0.083 3.424 3.571 compute_self_energy_cubic_gw 1 8.0 0.003 0.007 3.567 3.570 contract_cubic_gw 21 9.0 0.000 0.000 2.765 2.765 dbt_reserve_blocks_index 2849 13.1 0.073 0.091 2.037 2.188 dbt_reserve_blocks_index_array 2791 12.2 0.010 0.012 2.036 2.187 dbt_tas_reserve_blocks_index 3300 14.5 0.125 0.163 2.009 2.159 dbm_reserve_blocks 3696 15.4 1.998 2.142 1.998 2.142 dbt_crop 1042 12.0 1.045 1.380 1.693 2.077 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.472 1.474 dbt_tas_replicate 396 14.1 0.613 0.800 1.299 1.465 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.301 1.313 convert_to_new_pgrid 2421 14.1 0.030 0.040 1.052 1.220 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.201 1.204 dbm_copy 1608 15.1 1.014 1.184 1.014 1.184 cp_gemm 105 8.4 0.000 0.000 1.091 1.100 cp_gemm_cosma 105 9.4 1.090 1.100 1.090 1.100 scf_env_do_scf 1 3.0 0.000 0.000 1.076 1.076 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 1.076 1.076 mp_max_i 1992 9.8 0.821 1.005 0.821 1.005 dbm_add 807 14.1 0.848 0.907 0.848 0.907 compute_W_cubic_GW 10 7.0 0.001 0.001 0.858 0.866 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=26.275999999999996, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=39.759, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=9.301, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=9.227, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.733, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.387, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.152, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.327000000000005, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=12.149, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.495, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.463, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.998, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=1.014, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.643, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.866, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.367, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 302.879 302.879 qs_forces 1 2.0 0.000 0.000 302.266 302.266 rebuild_ks_matrix 7 6.6 0.000 0.000 300.726 300.726 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 300.726 300.726 hfx_ks_matrix 7 8.6 0.000 0.000 298.638 298.638 dbt_total 4861 11.6 0.032 0.032 248.141 248.141 hfx_ri_update_ks 7 9.6 0.000 0.000 247.546 247.546 hfx_ri_update_ks_Pmat 7 10.6 33.847 33.847 247.543 247.543 qs_energies 1 3.0 0.000 0.000 220.306 220.306 scf_env_do_scf 1 4.0 0.000 0.000 219.998 219.998 qs_ks_update_qs_env 8 6.0 0.000 0.000 218.819 218.819 dbt_tas_total 2391 14.1 1.108 1.108 217.207 217.207 dbt_contract 1473 13.0 0.167 0.167 199.064 199.064 dbt_tas_multiply 1482 14.0 0.004 0.004 187.552 187.552 hfx_ri_update_ks_Pmat_KS 567 11.6 0.005 0.005 176.675 176.675 dbt_tas_dbm 1482 16.0 0.007 0.007 165.327 165.327 dbm_multiply 1482 18.0 165.306 165.306 165.306 165.306 dbt_tas_mm_2 649 17.1 0.005 0.005 147.157 147.157 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 133.967 133.967 init_scf_loop 2 5.0 0.000 0.000 86.029 86.029 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 81.910 81.910 hfx_ri_update_forces 1 7.0 0.000 0.000 51.089 51.089 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 34.575 34.575 dbt_tas_reshape 906 14.4 0.011 0.011 21.979 21.979 dbt_copy 2331 12.4 0.241 0.241 19.376 19.376 dbt_tas_merge 649 14.1 12.837 12.837 14.154 14.154 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 13.633 13.633 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 13.576 13.576 precalc_derivatives 1 8.0 0.007 0.007 13.236 13.236 dbt_tas_reshape_buffer_fill 906 15.4 12.609 12.609 12.609 12.609 dbm_reserve_blocks 8303 16.8 11.785 11.785 11.785 11.785 dbt_tas_mm_3T 659 17.1 0.002 0.002 11.734 11.734 dbt_tas_reserve_blocks_index 7397 16.0 0.365 0.365 11.261 11.261 dbt_crop 2763 14.2 7.839 7.839 11.253 11.253 dbt_reshape 856 13.9 6.195 6.195 10.753 10.753 dbt_reserve_blocks_index 4998 15.2 0.143 0.143 8.567 8.567 dbt_reserve_blocks_index_array 4963 14.3 0.022 0.022 8.511 8.511 build_3c_derivatives 9 9.0 2.387 2.387 7.464 7.464 dbt_tas_reshape_buffer_obtain 906 15.4 6.382 6.382 7.152 7.152 reshape_mm_small 906 15.6 0.141 0.141 6.764 6.764 dbt_tas_replicate 906 15.6 5.129 5.129 6.561 6.561 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.024 69.804 69.815 qs_forces 1 2.0 0.000 0.000 69.602 69.602 rebuild_ks_matrix 7 6.6 0.000 0.000 68.793 68.794 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 68.793 68.794 hfx_ks_matrix 7 8.6 0.001 0.002 67.515 67.523 dbt_total 4861 11.6 0.033 0.036 61.116 61.126 dbt_contract 1473 13.0 0.115 0.125 46.856 46.869 hfx_ri_update_ks 7 9.6 0.000 0.000 46.236 46.236 hfx_ri_update_ks_Pmat 7 10.6 1.651 2.166 46.234 46.234 dbt_tas_total 2391 14.1 0.141 0.168 43.808 43.812 qs_energies 1 3.0 0.000 0.000 42.115 42.116 scf_env_do_scf 1 4.0 0.000 0.001 41.947 41.947 qs_ks_update_qs_env 8 6.0 0.000 0.000 41.323 41.325 dbt_tas_multiply 1482 14.0 0.006 0.007 39.258 39.261 dbt_tas_dbm 1482 16.0 0.006 0.007 29.738 29.774 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 27.472 27.472 hfx_ri_update_ks_Pmat_KS 567 11.6 0.005 0.005 25.316 25.319 dbm_multiply 1482 18.0 19.831 25.054 19.831 25.054 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 24.408 24.408 hfx_ri_update_forces 1 7.0 0.000 0.001 21.278 21.287 mp_sync 17513 13.6 15.410 18.220 15.410 18.220 dbt_tas_mm_2 649 17.1 0.004 0.005 17.965 17.979 init_scf_loop 2 5.0 0.000 0.000 17.537 17.538 hfx_ri_forces_Pmat_3c 1 8.0 0.003 0.007 14.734 14.753 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.003 0.003 9.424 9.424 dbt_copy 2349 12.4 0.039 0.044 7.944 8.365 dbt_reshape 1256 13.5 2.554 2.851 5.568 5.821 dbt_crop 2763 14.2 3.421 4.191 4.326 5.130 precalc_derivatives 1 8.0 0.002 0.003 4.985 4.985 dbt_tas_mm_3T 659 17.1 0.002 0.002 4.552 4.870 dbt_tas_mm_3N 163 16.5 0.001 0.001 4.716 4.788 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.001 4.646 4.646 mp_waitall_2 5988 16.5 3.442 3.719 3.442 3.719 dbt_tas_merge 649 14.1 1.751 2.255 3.060 3.547 dbm_reserve_blocks 8337 16.9 2.763 3.156 2.763 3.156 dbt_tas_reserve_blocks_index 7428 16.1 0.276 0.344 2.643 3.063 mp_max_i 3372 12.5 2.337 2.779 2.337 2.779 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.665 2.672 dbt_tas_replicate 909 15.6 0.690 0.883 2.500 2.595 dbt_tas_communicate_buffer 1825 16.3 0.066 0.080 2.278 2.536 dbt_reserve_blocks_index 5398 15.2 0.130 0.152 2.226 2.535 dbt_reserve_blocks_index_array 5363 14.2 0.015 0.018 2.221 2.531 build_3c_derivatives 9 9.0 0.252 0.384 2.325 2.332 dbt_communicate_buffer 1256 14.5 0.048 0.061 2.006 2.141 mp_alltoall_i 4339 15.3 1.796 2.002 1.796 2.002 dbt_tas_reshape 916 14.4 0.010 0.012 1.851 1.956 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.002 0.002 1.854 1.869 convert_to_new_pgrid 4446 16.0 0.046 0.052 1.621 1.801 dbm_copy 3043 16.9 1.575 1.758 1.575 1.758 mp_sum_l 38201 15.3 1.370 1.668 1.370 1.668 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=58.656000000000006, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=165.306, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=33.847, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=12.837, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=12.609, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=11.785, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=7.839, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=21.534999999999997, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=19.831, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.651, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.751, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.763, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=3.421, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.442, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=15.41, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 187.540 187.540 qs_energies 1 2.0 0.000 0.000 187.351 187.351 mp2_main 1 3.0 0.000 0.000 181.919 181.919 mp2_gpw_main 1 4.0 0.001 0.001 181.454 181.454 mp2_ri_gpw_compute_in 1 5.0 0.558 0.558 130.697 130.697 mp2_ri_gpw_compute_in_loop 1 6.0 0.013 0.013 117.888 117.888 mp2_eri_3c_integrate_gpw 2656 7.0 0.016 0.016 86.626 86.626 integrate_v_rspace 2666 8.0 0.750 0.750 72.328 72.328 grid_integrate_task_list 2666 9.0 69.398 69.398 69.398 69.398 mp2_ri_gpw_compute_en 1 5.0 0.074 0.074 50.730 50.730 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.301 11.301 48.623 48.623 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.274 2.274 28.842 28.842 offload_gemm 2080 8.0 26.568 26.568 26.568 26.568 dbcsr_multiply_generic 5322 8.0 0.203 0.203 23.765 23.765 ao_to_mo_and_store_B_mult_1 2656 7.0 0.012 0.012 23.745 23.745 calculate_wavefunction 2656 8.0 8.254 8.254 12.974 12.974 multiply_cannon 5322 9.0 0.500 0.500 12.417 12.417 pw_transfer 63872 10.6 0.986 0.986 12.362 12.362 get_2c_integrals 1 6.0 0.000 0.000 12.243 12.243 fft_wrap_pw1pw2 53228 11.4 0.116 0.116 11.121 11.121 multiply_cannon_loop 5322 10.0 0.397 0.397 10.908 10.908 compute_2c_integrals 1 7.0 0.006 0.006 10.663 10.663 compute_2c_integrals_loop_lm 1 8.0 0.009 0.009 10.640 10.640 mp2_eri_2c_integrate_gpw 1 9.0 3.122 3.122 10.631 10.631 make_m2s 10644 9.0 0.069 0.069 8.974 8.974 multiply_cannon_multrec 5322 11.0 8.756 8.756 8.800 8.800 make_images 10644 10.0 3.450 3.450 8.597 8.597 fft_wrap_pw1pw2_20 21271 12.4 0.593 0.593 7.891 7.891 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 3.019 3.019 7.393 7.393 fft3d_s 53229 13.4 6.906 6.906 6.947 6.947 mp2_ri_gpw_compute_en_ener 2080 7.0 5.986 5.986 5.986 5.986 scf_env_do_scf 1 3.0 0.000 0.000 5.022 5.022 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 5.022 5.022 copy_dbcsr_to_fm 2679 8.0 0.031 0.031 4.923 4.923 potential_pw2rs 5322 10.0 0.147 0.147 4.280 4.280 collocate_single_gaussian 2656 10.0 0.149 0.149 3.779 3.779 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.021 45.958 45.968 qs_energies 1 2.0 0.000 0.000 45.881 45.882 mp2_main 1 3.0 0.001 0.011 43.309 43.309 mp2_gpw_main 1 4.0 0.002 0.003 43.172 43.172 mp2_ri_gpw_compute_in 1 5.0 0.044 0.045 19.943 25.890 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.005 18.378 24.328 mp2_ri_gpw_compute_en 1 5.0 0.081 0.088 23.145 23.745 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 15.843 21.867 integrate_v_rspace 93 8.1 0.134 0.144 15.746 21.643 grid_integrate_task_list 93 9.1 15.307 21.178 15.307 21.178 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.772 1.025 16.073 16.163 mp2_ri_gpw_compute_en_expansio 65 7.0 0.097 0.122 11.996 12.308 offload_gemm 65 8.0 11.899 12.201 11.899 12.201 mp_min_d 2 7.0 6.025 6.750 6.025 6.750 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.950 6.550 mp2_ri_gpw_compute_en_comm 17 7.0 0.107 0.176 2.887 3.299 mp_sendrecv_dm3 510 8.0 2.246 2.687 2.246 2.687 dbcsr_multiply_generic 176 8.0 0.010 0.015 2.185 2.440 scf_env_do_scf 1 3.0 0.000 0.000 2.428 2.429 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.428 2.428 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.165 2.416 get_2c_integrals 1 6.0 0.000 0.000 1.494 1.518 multiply_cannon 176 9.0 0.018 0.028 1.207 1.317 qs_scf_new_mos 10 5.0 0.000 0.000 1.222 1.306 multiply_cannon_loop 176 10.0 0.002 0.003 1.140 1.248 compute_2c_integrals 1 7.0 0.002 0.005 1.168 1.187 eigensolver 11 5.8 0.001 0.001 1.182 1.184 compute_2c_integrals_loop_lm 1 8.0 0.001 0.002 0.838 1.074 make_m2s 352 9.0 0.004 0.004 0.932 1.073 mp2_eri_2c_integrate_gpw 1 9.0 0.223 0.356 0.836 1.073 make_images 352 10.0 0.058 0.068 0.918 1.058 multiply_cannon_multrec 246 11.0 0.954 1.015 0.960 1.021 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.992 0.993 cp_fm_redistribute_end 11 7.8 0.377 0.981 0.391 0.984 pw_transfer 2120 10.5 0.044 0.055 0.854 0.945 cp_fm_diag_elpa_base 11 7.8 0.576 0.927 0.589 0.945 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=63.26299999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=69.398, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=26.568, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.301, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.756, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.254, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=8.754999999999995, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.307, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=11.899, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.772, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.954, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=6.025, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.246, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.083 0.083 136.649 136.649 qs_energies 1 2.0 0.000 0.000 135.221 135.221 scf_env_do_scf 1 3.0 0.000 0.000 128.185 128.185 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 128.185 128.185 qs_ks_update_qs_env 15 5.0 0.000 0.000 53.631 53.631 rebuild_ks_matrix 15 6.0 0.000 0.000 53.401 53.401 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 53.401 53.401 qs_scf_new_mos 15 5.0 0.000 0.000 47.122 47.122 eigensolver 15 6.0 0.002 0.002 38.749 38.749 qs_vxc_create 15 8.0 0.035 0.035 36.506 36.506 calculate_dispersion_nonloc 15 9.0 7.284 7.284 31.648 31.648 cp_fm_diag_elpa 15 7.0 0.000 0.000 24.923 24.923 cp_fm_diag_elpa_base 15 8.0 22.366 22.366 24.923 24.923 pw_transfer 1191 10.0 0.059 0.059 24.467 24.467 fft_wrap_pw1pw2 1086 11.0 0.010 0.010 24.264 24.264 qs_rho_update_rho_low 16 5.0 0.000 0.000 24.053 24.053 calculate_rho_elec 16 6.0 0.219 0.219 24.053 24.053 grid_collocate_task_list 16 7.0 22.625 22.625 22.625 22.625 fft_wrap_pw1pw2_150 765 12.0 3.379 3.379 17.651 17.651 sum_up_and_integrate 15 8.0 0.050 0.050 15.433 15.433 integrate_v_rspace 15 9.0 0.023 0.023 15.383 15.383 grid_integrate_task_list 15 10.0 14.770 14.770 14.770 14.770 cp_fm_cholesky_restore 45 7.0 11.568 11.568 11.568 11.568 fft3d_s 1087 13.0 11.105 11.105 11.112 11.112 pw_scatter_s 585 13.1 7.514 7.514 7.514 7.514 fft_wrap_pw1pw2_200 197 12.3 0.718 0.718 6.416 6.416 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.936 5.936 dbcsr_complete_redistribute 46 8.3 2.426 2.426 5.846 5.846 vdW_energy 15 10.0 4.823 4.823 4.823 4.823 xc_vxc_pw_create 15 9.0 0.220 0.220 4.823 4.823 cp_fm_upper_to_full 30 8.0 4.812 4.812 4.812 4.812 gspace_mixing 14 5.0 0.171 0.171 4.624 4.624 broyden_mixing 14 6.0 3.937 3.937 3.937 3.937 init_scf_run 1 3.0 0.000 0.000 3.360 3.360 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.228 3.228 xc_pw_derive 90 11.0 0.001 0.001 3.097 3.097 calculate_dm_sparse 15 6.0 0.022 0.022 2.792 2.792 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.033 70.306 70.316 qs_energies 1 2.0 0.001 0.012 69.986 69.987 scf_env_do_scf 1 3.0 0.000 0.001 65.475 65.475 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 65.475 65.475 qs_ks_update_qs_env 15 5.0 0.000 0.000 28.282 28.304 rebuild_ks_matrix 15 6.0 0.000 0.000 28.238 28.259 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.004 28.238 28.259 qs_rho_update_rho_low 16 5.0 0.000 0.000 23.039 23.044 calculate_rho_elec 16 6.0 0.007 0.007 23.039 23.044 grid_collocate_task_list 16 7.0 21.303 21.965 21.303 21.965 sum_up_and_integrate 15 8.0 0.009 0.017 15.798 15.855 integrate_v_rspace 15 9.0 0.001 0.001 15.789 15.846 qs_scf_new_mos 15 5.0 0.000 0.000 14.801 14.923 grid_integrate_task_list 15 10.0 14.389 14.921 14.389 14.921 eigensolver 15 6.0 0.001 0.002 13.614 13.658 qs_vxc_create 15 8.0 0.001 0.001 12.079 12.091 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.855 9.861 cp_fm_diag_elpa_base 15 8.0 9.687 9.724 9.852 9.856 calculate_dispersion_nonloc 15 9.0 1.055 1.940 9.802 9.829 pw_transfer 1191 10.0 0.079 0.090 9.363 9.485 fft_wrap_pw1pw2 1086 11.0 0.012 0.014 9.175 9.317 fft3d_ps 1086 13.0 2.686 3.138 7.281 7.562 fft_wrap_pw1pw2_150 765 12.0 0.314 0.389 6.195 6.277 mp_alltoall_z22v 1086 15.0 3.950 4.497 3.950 4.497 cp_fm_cholesky_restore 45 7.0 3.565 3.629 3.565 3.629 yz_to_x 501 13.9 0.248 0.307 2.769 3.076 fft_wrap_pw1pw2_200 197 12.3 0.236 0.290 2.811 2.873 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.707 2.708 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.360 2.576 xc_vxc_pw_create 15 9.0 0.019 0.025 2.276 2.307 x_to_yz 585 14.1 0.372 0.414 1.801 2.013 rs_pw_transfer 158 9.4 0.002 0.002 1.582 1.827 density_rs2pw 16 7.0 0.001 0.001 1.555 1.784 xc_pw_derive 90 11.0 0.001 0.001 1.567 1.655 vdW_energy 15 10.0 1.531 1.636 1.531 1.636 init_scf_run 1 3.0 0.000 0.001 1.547 1.549 build_core_ppnl 1 5.0 1.389 1.528 1.389 1.528 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.444 1.445 mp_waitany 520 11.3 1.132 1.427 1.132 1.427 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=54.215, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.625, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=22.366, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.77, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.568, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.105, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=17.411999999999992, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.303, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.687, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.389, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.565, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.95, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.122 0.122 268.843 268.843 qs_energies 1 2.0 0.000 0.000 268.555 268.555 ls_scf 1 3.0 0.000 0.000 267.158 267.158 ls_scf_main 1 4.0 0.002 0.002 257.086 257.086 density_matrix_trs4 11 5.0 0.013 0.013 159.294 159.294 ls_scf_dm_to_ks 11 5.0 0.000 0.000 92.273 92.273 matrix_ls_to_qs 11 6.0 0.000 0.000 88.948 88.948 dbcsr_multiply_generic 185 6.1 0.833 0.833 83.035 83.035 arnoldi_extremal 12 6.1 0.000 0.000 64.140 64.140 arnoldi_normal_ev 12 7.1 0.014 0.014 64.139 64.139 build_subspace 23 8.1 0.081 0.081 62.880 62.880 dbcsr_matrix_vector_mult 652 9.0 0.184 0.184 62.696 62.696 dbcsr_matrix_vector_mult_local 652 10.0 61.256 61.256 61.265 61.265 multiply_cannon 185 7.1 0.331 0.331 51.231 51.231 dbcsr_copy_into_existing 11 7.0 48.249 48.249 48.249 48.249 dbcsr_complete_redistribute 23 7.5 31.822 31.822 44.658 44.658 matrix_decluster 11 7.0 0.000 0.000 40.698 40.698 multiply_cannon_loop 185 8.1 0.338 0.338 38.656 38.656 multiply_cannon_multrec 185 9.1 28.463 28.463 28.545 28.545 make_m2s 370 7.1 0.039 0.039 26.988 26.988 make_images 370 8.1 11.187 11.187 25.048 25.048 dbcsr_finalize 646 7.5 0.216 0.216 16.937 16.937 dbcsr_merge_all 597 8.5 2.618 2.618 15.771 15.771 setup_rec_index_2d 370 8.1 12.133 12.133 12.133 12.133 tree_to_linear_d 110 9.4 11.732 11.732 11.732 11.732 dbcsr_sort_indices 1103 9.9 10.504 10.504 10.504 10.504 calculate_norms 370 9.1 9.773 9.773 9.773 9.773 ls_scf_init_scf 1 4.0 0.000 0.000 9.316 9.316 quick_finalize 395 10.0 0.431 0.431 9.113 9.113 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.912 8.912 dbcsr_special_finalize 370 9.1 0.002 0.002 8.419 8.419 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 8.232 8.232 matrix_qs_to_ls 12 5.1 0.000 0.000 5.729 5.729 matrix_cluster 12 6.1 0.000 0.000 5.729 5.729 dbcsr_dot_sd 144 6.3 5.465 5.465 5.467 5.467 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.032 83.956 83.968 qs_energies 1 2.0 0.000 0.000 83.850 83.850 ls_scf 1 3.0 0.000 0.000 83.754 83.755 ls_scf_main 1 4.0 0.001 0.009 80.606 80.607 density_matrix_trs4 11 5.0 0.007 0.022 77.584 77.665 dbcsr_multiply_generic 185 6.1 0.070 0.086 72.587 72.864 multiply_cannon 185 7.1 0.035 0.042 59.892 60.889 multiply_cannon_loop 185 8.1 0.129 0.148 56.995 57.860 multiply_cannon_multrec 1480 9.1 34.400 37.564 34.711 37.893 mp_waitall_1 11936 10.3 19.387 21.946 19.387 21.946 multiply_cannon_metrocomm3 1480 9.1 0.014 0.016 11.518 16.407 make_m2s 370 7.1 0.037 0.039 8.222 8.310 make_images 370 8.1 0.668 0.722 8.083 8.173 multiply_cannon_metrocomm1 1480 9.1 0.008 0.010 4.498 7.936 calculate_norms 2960 9.1 6.062 7.764 6.062 7.764 mp_sum_l 1119 5.6 3.484 4.324 3.484 4.324 make_images_data 370 9.1 0.009 0.011 3.667 3.981 arnoldi_extremal 12 6.1 0.000 0.000 3.779 3.813 arnoldi_normal_ev 12 7.1 0.001 0.004 3.778 3.812 build_subspace 23 8.1 0.024 0.031 3.622 3.624 hybrid_alltoall_any 393 9.9 0.238 1.175 3.165 3.368 dbcsr_matrix_vector_mult 652 9.0 0.010 0.049 2.662 3.290 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 2.440 3.155 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.618 2.684 ls_scf_init_scf 1 4.0 0.000 0.000 2.459 2.463 dbcsr_matrix_vector_mult_local 652 10.0 1.912 2.439 1.914 2.441 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.432 2.438 dbcsr_complete_redistribute 23 7.5 1.323 1.543 2.233 2.326 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.002 2.229 2.231 matrix_ls_to_qs 11 6.0 0.000 0.000 2.132 2.229 make_images_pack 370 9.1 1.957 2.225 1.960 2.228 matrix_decluster 11 7.0 0.000 0.000 1.980 2.059 buffer_matrices_ensure_size 370 8.1 1.634 1.946 1.634 1.946 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=77.14700000000002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=61.256, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=48.249, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=31.822, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=28.463, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=12.133, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.773, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=15.430999999999997, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.912, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.323, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=34.4, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=6.062, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.957, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.484, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=19.387, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 75.901 75.901 lib_test 1 2.0 0.000 0.000 75.878 75.878 dbcsr_run_tests 3 3.0 0.002 0.002 75.878 75.878 test_multiplies_multiproc 3 4.0 0.002 0.002 59.551 59.551 dbcsr_redistribute 9 5.0 37.244 37.244 38.885 38.885 dbcsr_multiply_generic 9 5.0 0.001 0.001 18.989 18.989 dbcsr_make_random_matrix 9 4.0 13.103 13.103 16.231 16.231 multiply_cannon 9 6.0 0.002 0.002 14.011 14.011 multiply_cannon_loop 9 7.0 0.041 0.041 13.646 13.646 multiply_cannon_multrec 9 8.0 13.604 13.604 13.605 13.605 dbcsr_finalize 27 5.7 0.032 0.032 5.868 5.868 dbcsr_merge_all 18 6.5 2.228 2.228 5.167 5.167 dbcsr_data_release 975 7.6 2.529 2.529 2.529 2.529 tree_to_linear_d 9 7.0 2.023 2.023 2.023 2.023 make_m2s 18 6.0 0.001 0.001 1.664 1.664 make_images 18 7.0 0.577 0.577 1.611 1.611 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.017 21.769 21.774 lib_test 1 2.0 0.000 0.000 21.718 21.737 dbcsr_run_tests 3 3.0 0.001 0.001 21.716 21.736 test_multiplies_multiproc 3 4.0 0.000 0.003 20.776 20.840 dbcsr_multiply_generic 9 5.0 0.001 0.001 18.738 18.796 multiply_cannon 9 6.0 0.002 0.002 16.725 17.059 multiply_cannon_loop 9 7.0 0.002 0.004 16.398 16.753 multiply_cannon_multrec 72 8.0 13.489 14.811 13.490 14.812 mp_waitall_1 576 9.2 3.280 4.083 3.280 4.083 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 2.565 3.333 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.335 1.140 dbcsr_make_random_matrix 9 4.0 0.728 1.000 0.910 1.133 mp_sum_l 390 2.5 0.603 1.054 0.603 1.054 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.586 1.035 make_m2s 18 6.0 0.001 0.001 0.793 0.852 make_images 18 7.0 0.023 0.029 0.790 0.848 dbcsr_finalize 27 5.7 0.000 0.000 0.693 0.795 dbcsr_data_release 444 7.6 0.614 0.721 0.614 0.721 dbcsr_merge_all 18 6.5 0.108 0.145 0.599 0.675 dbcsr_destroy 111 5.9 0.000 0.001 0.523 0.611 dbcsr_checksum 6 5.0 0.180 0.594 0.596 0.596 dbcsr_redistribute 9 5.0 0.274 0.320 0.515 0.547 make_images_data 18 8.0 0.000 0.001 0.421 0.495 mp_sum_d 191 1.2 0.418 0.445 0.418 0.445 hybrid_alltoall_any 18 9.0 0.037 0.176 0.377 0.437 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=7.192999999999998, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=37.244, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=13.604, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.103, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.529, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.228, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.6729999999999947, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.274, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=13.489, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.728, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.614, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.108, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.28, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.603, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 138.595 138.595 qs_mol_dyn_low 1 2.0 0.004 0.004 137.184 137.184 velocity_verlet 5 3.0 0.003 0.003 112.061 112.061 qmmm_el_coupling 6 3.8 0.000 0.000 88.773 88.773 qmmm_elec_with_gaussian 6 4.8 0.101 0.101 88.768 88.768 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 87.905 87.905 qmmm_elec_gaussian_low_G 6 6.8 86.955 86.955 86.955 86.955 qs_forces 6 3.8 0.001 0.001 39.283 39.283 qs_energies 6 4.8 0.000 0.000 34.985 34.985 scf_env_do_scf 6 5.8 0.001 0.001 32.601 32.601 scf_env_do_scf_inner_loop 39 6.8 0.005 0.005 28.223 28.223 rebuild_ks_matrix 45 8.4 0.000 0.000 27.102 27.102 qs_ks_build_kohn_sham_matrix 45 9.4 0.006 0.006 27.102 27.102 qs_ks_update_qs_env 45 7.8 0.000 0.000 23.251 23.251 pw_transfer 966 12.3 0.058 0.058 18.674 18.674 fft_wrap_pw1pw2 801 13.6 0.007 0.007 18.385 18.385 fft_wrap_pw1pw2_150 507 15.2 2.199 2.199 17.890 17.890 qs_vxc_create 45 10.4 0.001 0.001 14.781 14.781 xc_vxc_pw_create 45 11.4 0.670 0.670 14.780 14.780 xc_pw_derive 270 13.4 0.002 0.002 10.270 10.270 fft3d_s 802 15.6 8.537 8.537 8.565 8.565 qs_rho_update_rho_low 45 7.9 0.000 0.000 8.141 8.141 calculate_rho_elec 45 8.9 0.564 0.564 8.140 8.140 xc_rho_set_and_dset_create 45 12.4 0.762 0.762 7.549 7.549 xc_pw_divergence 45 12.4 0.001 0.001 6.478 6.478 pw_scatter_s 429 15.8 5.862 5.862 5.862 5.862 qmmm_forces 6 3.8 0.001 0.001 5.854 5.854 qmmm_forces_with_gaussian 6 4.8 0.103 0.103 5.483 5.483 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.613 4.613 pw_integral_ab 2539 7.4 4.556 4.556 4.556 4.556 init_scf_loop 6 6.8 0.000 0.000 4.373 4.373 qs_ks_ddapc 45 10.4 0.001 0.001 4.358 4.358 grid_collocate_task_list 45 9.9 3.890 3.890 3.890 3.890 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.859 3.859 qmmm_forces_gaussian_low_G 6 6.8 3.834 3.834 3.834 3.834 density_rs2pw 45 9.9 0.002 0.002 3.686 3.686 sum_up_and_integrate 45 10.4 0.160 0.160 3.670 3.670 integrate_v_rspace 45 11.4 0.011 0.011 3.510 3.510 pw_poisson_solve 51 9.9 1.352 1.352 3.163 3.163 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.058 69.781 69.795 qs_mol_dyn_low 1 2.0 0.004 0.006 68.466 68.529 qs_forces 6 3.8 0.001 0.001 49.833 49.833 qs_energies 6 4.8 0.000 0.001 47.397 47.397 scf_env_do_scf 6 5.8 0.000 0.001 46.220 46.220 scf_env_do_scf_inner_loop 113 6.2 0.003 0.020 44.142 44.146 rebuild_ks_matrix 119 8.1 0.000 0.000 33.038 33.050 qs_ks_build_kohn_sham_matrix 119 9.1 0.017 0.027 33.038 33.050 qs_ks_update_qs_env 119 7.3 0.001 0.001 30.929 30.944 velocity_verlet 5 3.0 0.002 0.004 28.664 28.668 pw_transfer 2446 12.3 0.182 0.198 21.286 21.881 fft_wrap_pw1pw2 2059 13.4 0.023 0.025 20.847 21.467 fft_wrap_pw1pw2_150 1321 14.9 1.602 1.938 19.822 20.412 qs_vxc_create 119 10.1 0.003 0.004 16.931 16.940 xc_vxc_pw_create 119 11.1 0.176 0.247 16.928 16.938 fft3d_ps 2059 15.4 7.806 8.959 16.056 16.924 xc_pw_derive 714 13.1 0.008 0.010 12.795 13.139 qs_rho_update_rho_low 119 7.3 0.001 0.001 13.092 13.102 calculate_rho_elec 119 8.3 0.051 0.057 13.091 13.102 sum_up_and_integrate 119 10.1 0.071 0.102 11.756 11.851 integrate_v_rspace 119 11.1 0.004 0.004 11.685 11.794 rs_pw_transfer 988 11.5 0.013 0.016 9.275 9.638 qmmm_forces 6 3.8 0.002 0.003 9.467 9.468 qmmm_forces_with_gaussian 6 4.8 0.342 0.432 8.520 9.258 xc_rho_set_and_dset_create 119 12.1 0.419 0.804 7.866 9.023 xc_pw_divergence 119 12.1 0.004 0.005 8.527 8.809 qmmm_el_coupling 6 3.8 0.000 0.000 7.967 8.204 qmmm_elec_with_gaussian 6 4.8 0.373 0.459 7.965 8.202 density_rs2pw 119 9.3 0.006 0.007 7.828 8.186 mp_alltoall_z22v 2059 17.4 6.626 7.742 6.626 7.742 potential_pw2rs 119 12.1 0.006 0.008 7.285 7.309 grid_collocate_task_list 119 9.3 4.980 5.285 4.980 5.285 x_to_yz 1095 16.8 0.945 1.064 4.261 4.694 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.364 4.601 yz_to_x 964 16.0 0.627 0.745 3.936 4.586 mp_waitany 4028 12.8 3.636 4.291 3.636 4.291 grid_integrate_task_list 119 12.1 3.844 4.241 3.844 4.241 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.714 4.011 qmmm_forces_gaussian_low_G 6 6.8 3.590 3.843 3.590 3.843 rs_pw_transfer_PW2RS_150 125 13.9 1.380 1.774 3.711 3.788 pw_restrict_s3 18 5.8 1.526 1.878 2.989 3.444 qmmm_elec_gaussian_low_G 6 6.8 3.087 3.370 3.087 3.370 rs_pw_transfer_RS2PW_150 125 11.2 1.020 1.287 2.992 3.347 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.438 2.658 pw_prolongate_s3 18 6.8 1.280 1.497 2.438 2.658 mp_waitall_1 188862 16.2 2.338 2.571 2.338 2.571 dbcsr_multiply_generic 2588 12.3 0.063 0.079 2.290 2.454 qs_scf_new_mos 113 7.2 0.001 0.001 2.375 2.382 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.374 2.381 qs_ks_ddapc 119 10.1 0.002 0.003 2.221 2.325 mp_sum_dm3 33 5.7 2.131 2.310 2.131 2.310 ot_scf_mini 113 9.2 0.001 0.001 2.275 2.280 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.132 2.133 init_scf_loop 6 6.8 0.000 0.001 2.075 2.082 pw_integral_ab 2761 7.7 1.279 1.420 1.853 1.963 mp_sum_d 5820 12.2 1.516 1.906 1.516 1.906 pw_gather_p 964 15.0 1.451 1.826 1.451 1.826 pw_scatter_p 1095 15.8 1.678 1.730 1.678 1.730 xc_functional_eval 238 13.1 0.003 0.004 0.785 1.477 pw_copy 1670 12.6 1.187 1.443 1.187 1.443 ot_mini 113 10.2 0.001 0.004 1.390 1.397 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=28.795, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=86.955, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.537, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.862, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.556, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.89, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=38.523, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=3.087, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.279, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.98, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.806, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.844, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.636, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=6.626, yerr=0.0 Summary: Performance test took 38 minutes. Status: OK Removing intermediate container 54cd600670eb ---> ffcdfcc2a2e8 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in c2514fe9682a Removing intermediate container c2514fe9682a ---> 74e35d5b4cc1 Step 42/42 : ENTRYPOINT [] ---> Running in 1da75d7b775c Removing intermediate container 1da75d7b775c ---> 45766be44f02 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 45766be44f02 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-08-17 19:46:50+00:00