StartDate: 2022-09-10 11:11:07+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: d348ce2f59b4a117d29a1714b1b5b3cce01a406d CommitTime: 2022-09-10 09:54:46 +0200 CommitAuthor: Frederick Stein CommitSubject: Refactoring of the fm module (#2277) Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=d348ce2f59b4a117d29a1714b1b5b3cce01a406d Sending build context to Docker daemon 364.3MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 2b55860d4c66: Already exists Digest: sha256:20fa2d7bb4de7723f542be5923b06c4d704370f0390e4ae9e1c833c8785644c1 Status: Downloaded newer image for ubuntu:22.04 ---> 2dc39ba059dc Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> bf0c853ea628 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 3f014c28f167 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 9e0fed4f46ea Step 5/42 : RUN mkdir scripts ---> Using cache ---> 51c38e746560 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 2dae48987405 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 0866f6db4185 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> 31d8b226d31b Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 97d8aeb6f496 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 8aeabfd5037c Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 8618a3774453 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> b40da73828df Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 3865622e9bc2 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> b1774297145c Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> cc84a0fe72f2 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 741c87e87f58 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 9530916da9b2 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> caa7753c160d Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> ce75453dd733 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 442ef0fc5d20 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 36befdbe1182 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 9a666e8026db Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 8cb1bf2b0d2d Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 7356e3cd4224 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> ea807d1d7e9f Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 74dbb93b0cd1 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 0d39cf40621f Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 83be2aba63c6 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> bdfeedf6f8e6 Step 30/42 : COPY ./Makefile . ---> Using cache ---> 197c256193cb Step 31/42 : COPY ./src ./src ---> a1c968c38c94 Step 32/42 : COPY ./exts ./exts ---> 87ac386415a1 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> 6f5e1e863054 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in e37566028e75 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container e37566028e75 ---> b28b7fc54636 Step 35/42 : COPY ./data ./data ---> e26190ce68eb Step 36/42 : COPY ./tests ./tests ---> a78681853313 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> b34aaa14a7ec Step 38/42 : COPY ./benchmarks ./benchmarks ---> a7e89e5d520e Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> bde934a3a9f5 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in e6c9cb884919 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.036 0.036 87.406 87.406 qs_mol_dyn_low 1 2.0 0.003 0.003 86.800 86.800 qs_forces 11 3.9 0.001 0.001 86.753 86.753 qs_energies 11 4.9 0.001 0.001 80.560 80.560 scf_env_do_scf 11 5.9 0.001 0.001 69.120 69.120 velocity_verlet 10 3.0 0.002 0.002 57.380 57.380 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 52.436 52.436 rebuild_ks_matrix 119 8.3 0.001 0.001 20.054 20.054 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 20.053 20.053 dbcsr_multiply_generic 2286 12.5 0.160 0.160 19.431 19.431 qs_scf_new_mos 108 7.5 0.001 0.001 19.224 19.224 qs_scf_loop_do_ot 108 8.5 0.001 0.001 19.223 19.223 qs_rho_update_rho_low 119 7.7 0.001 0.001 18.620 18.620 calculate_rho_elec 119 8.7 0.944 0.944 18.620 18.620 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.301 18.301 ot_scf_mini 108 9.5 0.002 0.002 17.955 17.955 init_scf_loop 11 6.9 0.000 0.000 16.552 16.552 grid_collocate_task_list 119 9.7 14.478 14.478 14.478 14.478 prepare_preconditioner 11 7.9 0.000 0.000 14.077 14.077 make_preconditioner 11 8.9 0.000 0.000 14.077 14.077 make_full_inverse_cholesky 11 9.9 0.000 0.000 12.857 12.857 sum_up_and_integrate 119 10.3 0.183 0.183 12.400 12.400 integrate_v_rspace 119 11.3 0.108 0.108 12.217 12.217 ot_mini 108 10.5 0.001 0.001 11.715 11.715 make_m2s 4572 13.5 0.047 0.047 10.982 10.982 grid_integrate_task_list 119 12.3 10.334 10.334 10.334 10.334 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.994 6.994 qs_ot_get_derivative 108 11.5 0.001 0.001 6.128 6.128 dbcsr_make_dense_low 5837 15.5 0.066 0.066 5.869 5.869 make_dense_data 5837 16.5 5.239 5.239 5.789 5.789 pw_transfer 1439 11.6 0.062 0.062 5.731 5.731 ot_diis_step 108 11.5 0.004 0.004 5.583 5.583 make_images 4572 14.5 2.117 2.117 5.525 5.525 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 5.507 5.507 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.139 5.139 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.119 5.119 apply_single 119 13.6 0.000 0.000 5.118 5.118 fft_wrap_pw1pw2_140 487 13.2 0.439 0.439 4.716 4.716 cp_fm_cholesky_decompose 22 10.9 4.681 4.681 4.681 4.681 multiply_cannon 2286 13.5 0.173 0.173 4.458 4.458 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.003 4.003 multiply_cannon_loop 2286 14.5 0.030 0.030 4.001 4.001 cp_fm_cholesky_invert 11 10.9 3.990 3.990 3.990 3.990 multiply_cannon_multrec 2286 15.5 3.922 3.922 3.970 3.970 init_scf_run 11 5.9 0.002 0.002 3.840 3.840 scf_env_initial_rho_setup 11 6.9 0.001 0.001 3.838 3.838 dbcsr_complete_redistribute 329 12.2 2.000 2.000 3.733 3.733 qs_create_task_list 11 7.9 0.000 0.000 3.619 3.619 generate_qs_task_list 11 8.9 2.615 2.615 3.619 3.619 dbcsr_copy 2102 12.0 0.185 0.185 3.576 3.576 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.508 3.508 wfi_extrapolate 11 7.9 0.001 0.001 3.370 3.370 dbcsr_copy_into_existing 22 7.9 3.360 3.360 3.361 3.361 density_rs2pw 119 9.7 0.004 0.004 3.198 3.198 copy_dbcsr_to_fm 153 11.3 0.002 0.002 3.014 3.014 qs_ot_get_p 119 10.4 0.001 0.001 2.858 2.858 fft3d_s 1202 14.6 2.709 2.709 2.714 2.714 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.682 2.682 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.546 2.546 pw_poisson_solve 119 10.3 0.883 0.883 2.360 2.360 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.349 2.349 dbcsr_data_release 279534 16.0 2.224 2.224 2.224 2.224 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.961 1.961 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.937 1.937 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.871 1.871 potential_pw2rs 119 12.3 0.044 0.044 1.775 1.775 cp_fm_upper_to_full 72 14.2 1.748 1.748 1.748 1.748 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.022 46.841 46.851 qs_mol_dyn_low 1 2.0 0.003 0.004 46.727 46.731 qs_forces 11 3.9 0.001 0.001 46.687 46.687 qs_energies 11 4.9 0.001 0.001 43.458 43.460 scf_env_do_scf 11 5.9 0.000 0.002 39.878 39.879 scf_env_do_scf_inner_loop 108 6.5 0.002 0.017 36.837 36.837 velocity_verlet 10 3.0 0.001 0.003 27.858 27.859 rebuild_ks_matrix 119 8.3 0.000 0.001 17.882 18.064 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.017 17.882 18.063 qs_ks_update_qs_env 119 7.6 0.001 0.001 15.931 16.097 sum_up_and_integrate 119 10.3 0.016 0.020 13.726 13.755 integrate_v_rspace 119 11.3 0.004 0.005 13.710 13.740 qs_rho_update_rho_low 119 7.7 0.001 0.001 13.480 13.492 calculate_rho_elec 119 8.7 0.029 0.032 13.480 13.491 dbcsr_multiply_generic 2286 12.5 0.068 0.077 12.998 13.275 qs_scf_new_mos 108 7.5 0.001 0.001 10.294 10.503 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.293 10.502 grid_collocate_task_list 119 9.7 8.955 10.378 8.955 10.378 grid_integrate_task_list 119 12.3 8.049 10.335 8.049 10.335 multiply_cannon 2286 13.5 0.121 0.145 9.611 10.285 ot_scf_mini 108 9.5 0.002 0.003 9.684 9.878 multiply_cannon_loop 2286 14.5 0.089 0.107 9.136 9.840 mp_waitall_1 169478 16.3 8.322 9.197 8.322 9.197 multiply_cannon_metrocomm3 18288 15.5 0.037 0.045 5.492 6.240 ot_mini 108 10.5 0.001 0.001 5.648 5.861 rs_pw_transfer 974 11.9 0.010 0.014 5.312 5.681 density_rs2pw 119 9.7 0.005 0.006 4.194 4.550 potential_pw2rs 119 12.3 0.006 0.008 3.251 3.279 multiply_cannon_multrec 18288 15.5 2.806 3.169 2.815 3.179 pw_transfer 1439 11.6 0.081 0.103 3.065 3.121 qs_ot_get_derivative 108 11.5 0.001 0.001 2.923 3.117 init_scf_loop 11 6.9 0.000 0.000 3.029 3.029 fft_wrap_pw1pw2 1201 12.6 0.008 0.010 2.924 2.969 mp_alltoall_d11v 2130 13.8 2.587 2.921 2.587 2.921 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.615 2.790 apply_single 119 13.6 0.000 0.000 2.615 2.790 mp_waitany 9880 13.7 2.199 2.775 2.199 2.775 rs_gather_matrices 119 12.3 0.077 0.087 2.375 2.725 ot_diis_step 108 11.5 0.003 0.004 2.703 2.703 fft_wrap_pw1pw2_140 487 13.2 0.239 0.288 2.485 2.599 init_scf_run 11 5.9 0.000 0.004 2.458 2.458 scf_env_initial_rho_setup 11 6.9 0.000 0.003 2.458 2.458 make_m2s 4572 13.5 0.044 0.054 2.229 2.308 rs_pw_transfer_RS2PW_140 130 11.5 0.250 0.321 1.918 2.281 wfi_extrapolate 11 7.9 0.001 0.001 2.231 2.231 fft3d_ps 1201 14.6 1.119 1.239 2.083 2.172 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.085 2.101 make_images 4572 14.5 0.114 0.133 1.916 1.969 qs_ot_get_p 119 10.4 0.001 0.001 1.249 1.497 multiply_cannon_metrocomm1 18288 15.5 0.018 0.023 0.545 1.492 rs_pw_transfer_PW2RS_140 130 13.9 0.516 0.653 1.344 1.427 mp_sum_l 11218 13.2 0.739 1.378 0.739 1.378 mp_sum_d 4129 12.0 0.882 1.291 0.882 1.291 make_images_data 4572 15.5 0.034 0.040 1.058 1.216 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.859 1.116 prepare_preconditioner 11 7.9 0.000 0.000 1.082 1.108 make_preconditioner 11 8.9 0.000 0.000 1.082 1.108 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.964 1.062 hybrid_alltoall_any 4725 16.4 0.056 0.168 0.906 1.034 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.913 1.009 make_full_inverse_cholesky 11 9.9 0.000 0.000 0.977 1.004 mp_alltoall_z22v 1201 16.6 0.734 0.966 0.734 0.966 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 0.948 0.948 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=44.76200000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.478, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.334, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.239, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.681, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=3.99, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.922, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=16.122000000000003, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.955, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.049, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.806, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.322, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=2.587, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.029 0.029 109.311 109.311 qs_mol_dyn_low 1 2.0 0.003 0.003 108.677 108.677 qs_forces 11 3.9 0.001 0.001 108.638 108.638 qs_energies 11 4.9 0.001 0.001 100.907 100.907 scf_env_do_scf 11 5.9 0.001 0.001 88.382 88.382 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 70.620 70.620 velocity_verlet 10 3.0 0.002 0.002 70.279 70.279 rebuild_ks_matrix 107 8.3 0.001 0.001 33.175 33.175 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 33.175 33.175 qs_rho_update_rho_low 107 7.7 0.000 0.000 30.869 30.869 calculate_rho_elec 107 8.7 0.850 0.850 30.868 30.868 qs_ks_update_qs_env 107 7.6 0.001 0.001 29.799 29.799 grid_collocate_task_list 107 9.7 26.969 26.969 26.969 26.969 sum_up_and_integrate 107 10.3 0.164 0.164 26.335 26.335 integrate_v_rspace 107 11.3 0.087 0.087 26.170 26.170 grid_integrate_task_list 107 12.3 24.413 24.413 24.413 24.413 init_scf_loop 11 6.9 0.000 0.000 17.629 17.629 dbcsr_multiply_generic 1966 12.4 0.139 0.139 16.932 16.932 qs_scf_new_mos 96 7.5 0.000 0.000 16.644 16.644 qs_scf_loop_do_ot 96 8.5 0.001 0.001 16.643 16.643 ot_scf_mini 96 9.5 0.002 0.002 15.211 15.211 prepare_preconditioner 11 7.9 0.000 0.000 13.568 13.568 make_preconditioner 11 8.9 0.000 0.000 13.568 13.568 make_full_inverse_cholesky 11 9.9 0.000 0.000 12.405 12.405 ot_mini 96 10.5 0.001 0.001 9.885 9.885 make_m2s 3932 13.4 0.039 0.039 9.350 9.350 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.644 6.644 pw_transfer 1295 11.6 0.053 0.053 5.386 5.386 qs_ot_get_derivative 96 11.5 0.001 0.001 5.293 5.293 fft_wrap_pw1pw2 1081 12.6 0.005 0.005 5.202 5.202 init_scf_run 11 5.9 0.002 0.002 5.177 5.177 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.175 5.175 dbcsr_make_dense_low 4961 15.5 0.064 0.064 5.039 5.039 make_dense_data 4961 16.5 4.454 4.454 4.964 4.964 make_images 3932 14.4 1.875 1.875 4.651 4.651 ot_diis_step 96 11.5 0.003 0.003 4.590 4.590 wfi_extrapolate 11 7.9 0.001 0.001 4.567 4.567 fft_wrap_pw1pw2_140 439 13.2 0.462 0.462 4.481 4.481 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.392 4.392 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.258 4.258 apply_single 107 13.6 0.000 0.000 4.258 4.258 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.239 4.239 cp_fm_cholesky_decompose 22 10.9 4.175 4.175 4.175 4.175 multiply_cannon 1966 13.4 0.189 0.189 4.113 4.113 cp_fm_cholesky_invert 11 10.9 3.974 3.974 3.974 3.974 dbcsr_complete_redistribute 317 12.2 1.874 1.874 3.706 3.706 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.659 3.659 multiply_cannon_loop 1966 14.4 0.034 0.034 3.618 3.618 multiply_cannon_multrec 1966 15.4 3.542 3.542 3.584 3.584 dbcsr_copy 1855 11.9 0.176 0.176 3.504 3.504 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.490 3.490 qs_create_task_list 11 7.9 0.000 0.000 3.294 3.294 generate_qs_task_list 11 8.9 2.405 2.405 3.294 3.294 dbcsr_copy_into_existing 22 7.9 3.272 3.272 3.273 3.273 density_rs2pw 107 9.7 0.003 0.003 3.049 3.049 copy_dbcsr_to_fm 147 11.2 0.002 0.002 3.047 3.047 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.568 2.568 fft3d_s 1082 14.6 2.516 2.516 2.520 2.520 qs_ot_get_p 107 10.4 0.001 0.001 2.440 2.440 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.323 2.323 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.020 80.866 80.877 qs_mol_dyn_low 1 2.0 0.003 0.004 80.766 80.770 qs_forces 11 3.9 0.001 0.001 80.726 80.727 qs_energies 11 4.9 0.001 0.001 75.269 75.270 scf_env_do_scf 11 5.9 0.000 0.002 69.755 69.756 scf_env_do_scf_inner_loop 96 6.5 0.002 0.017 64.653 64.653 velocity_verlet 10 3.0 0.001 0.003 48.080 48.081 rebuild_ks_matrix 107 8.3 0.000 0.000 36.160 36.290 qs_ks_build_kohn_sham_matrix 107 9.3 0.012 0.015 36.160 36.290 sum_up_and_integrate 107 10.3 0.015 0.018 31.937 32.548 integrate_v_rspace 107 11.3 0.004 0.005 31.922 32.535 qs_ks_update_qs_env 107 7.6 0.001 0.001 31.936 32.051 qs_rho_update_rho_low 107 7.7 0.000 0.001 30.870 30.882 calculate_rho_elec 107 8.7 0.027 0.029 30.869 30.881 grid_integrate_task_list 107 12.3 22.879 29.450 22.879 29.450 grid_collocate_task_list 107 9.7 21.888 27.997 21.888 27.997 dbcsr_multiply_generic 1966 12.4 0.060 0.068 11.625 17.534 rs_pw_transfer 878 11.9 0.009 0.012 9.539 10.322 density_rs2pw 107 9.7 0.004 0.005 8.658 9.463 qs_scf_new_mos 96 7.5 0.000 0.001 8.773 9.008 qs_scf_loop_do_ot 96 8.5 0.001 0.001 8.772 9.008 multiply_cannon 1966 13.4 0.109 0.122 7.953 8.516 ot_scf_mini 96 9.5 0.002 0.002 8.243 8.469 multiply_cannon_loop 1966 14.4 0.081 0.096 7.479 8.134 mp_alltoall_d11v 1998 13.7 6.299 7.813 6.299 7.813 mp_waitall_1 146670 16.2 6.566 7.624 6.566 7.624 mp_waitany 8968 13.7 6.783 7.592 6.783 7.592 rs_gather_matrices 107 12.3 0.062 0.075 6.082 7.567 rs_pw_transfer_RS2PW_140 118 11.5 0.264 0.324 6.603 7.405 mp_sum_l 9666 13.1 1.313 7.114 1.313 7.114 multiply_cannon_metrocomm3 15728 15.4 0.033 0.041 4.182 5.292 ot_mini 96 10.5 0.001 0.001 4.862 5.114 init_scf_loop 11 6.9 0.000 0.001 5.090 5.090 init_scf_run 11 5.9 0.000 0.004 4.390 4.390 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.389 4.390 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.340 4.356 wfi_extrapolate 11 7.9 0.001 0.001 3.998 3.998 pw_transfer 1295 11.6 0.074 0.092 2.948 3.015 potential_pw2rs 107 12.3 0.005 0.006 2.925 2.958 multiply_cannon_multrec 15728 15.4 2.565 2.880 2.574 2.891 fft_wrap_pw1pw2 1081 12.6 0.008 0.009 2.818 2.880 qs_ot_get_derivative 96 11.5 0.001 0.001 2.572 2.800 fft_wrap_pw1pw2_140 439 13.2 0.218 0.261 2.384 2.528 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.176 2.343 apply_single 107 13.6 0.000 0.000 2.175 2.343 ot_diis_step 96 11.5 0.003 0.003 2.266 2.267 fft3d_ps 1081 14.6 1.034 1.149 2.037 2.100 make_m2s 3932 13.4 0.039 0.047 1.981 2.069 make_images 3932 14.4 0.102 0.118 1.698 1.781 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=45.32600000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=26.969, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.413, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.454, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.175, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=3.974, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.450999999999993, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.888, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.879, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=6.299, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=6.783, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=6.566, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.179 0.179 110.187 110.187 qs_energies 1 2.0 0.000 0.000 109.392 109.392 scf_env_do_scf 1 3.0 0.000 0.000 108.276 108.276 qs_ks_update_qs_env 8 5.0 0.000 0.000 103.455 103.455 rebuild_ks_matrix 7 6.0 0.000 0.000 103.402 103.402 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 103.402 103.402 hfx_ks_matrix 7 8.0 0.000 0.000 94.171 94.171 integrate_four_center 7 9.0 1.736 1.736 94.155 94.155 integrate_four_center_main 7 10.0 0.575 0.575 81.299 81.299 integrate_four_center_bin 454 11.0 80.724 80.724 80.724 80.724 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 56.963 56.963 init_scf_loop 1 4.0 0.000 0.000 51.301 51.301 integrate_four_center_load 7 10.0 0.000 0.000 10.831 10.831 hfx_load_balance 1 11.0 0.001 0.001 10.831 10.831 hfx_load_balance_count 1 12.0 5.417 5.417 5.417 5.417 hfx_load_balance_bin 1 12.0 5.397 5.397 5.397 5.397 qs_vxc_create 14 8.0 0.000 0.000 3.648 3.648 xc_vxc_pw_create 14 9.0 0.117 0.117 3.648 3.648 xc_rho_set_and_dset_create 14 10.0 0.088 0.088 2.941 2.941 calculate_rho_elec 15 7.4 0.118 0.118 2.373 2.373 xc_functional_eval 35 11.0 0.000 0.000 2.334 2.334 prepare_preconditioner 1 5.0 0.000 0.000 2.325 2.325 make_preconditioner 1 6.0 0.000 0.000 2.325 2.325 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.223 0.242 131.815 131.825 qs_energies 1 2.0 0.000 0.000 131.465 131.471 scf_env_do_scf 1 3.0 0.000 0.000 131.104 131.105 qs_ks_update_qs_env 8 5.0 0.000 0.000 129.085 129.086 rebuild_ks_matrix 7 6.0 0.000 0.000 129.078 129.079 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 129.078 129.079 hfx_ks_matrix 7 8.0 0.000 0.001 123.027 123.029 integrate_four_center 7 9.0 0.053 0.360 123.019 123.021 integrate_four_center_main 7 10.0 0.003 0.004 79.266 111.224 integrate_four_center_bin 448 11.0 79.263 111.220 79.263 111.220 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 74.422 74.423 init_scf_loop 1 4.0 0.000 0.000 56.681 56.681 mp_sync 70 11.3 31.971 34.519 31.971 34.519 integrate_four_center_load 7 10.0 0.000 0.000 11.098 11.102 hfx_load_balance 1 11.0 0.001 0.001 11.098 11.102 mp_sum_l 1135 8.3 5.447 5.706 5.447 5.706 hfx_load_balance_dist 1 12.0 0.000 0.000 5.322 5.578 hfx_load_balance_bin 1 12.0 2.850 5.527 2.850 5.527 hfx_load_balance_count 1 12.0 2.850 5.494 2.850 5.494 qs_vxc_create 14 8.0 0.000 0.000 2.943 2.943 xc_vxc_pw_create 14 9.0 0.008 0.009 2.943 2.943 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=16.33799999999998, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=80.724, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.417, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.397, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.736, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.575, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.378, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=79.263, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.85, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.85, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.053, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=31.971, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=5.447, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 79.544 79.544 qs_energies 1 2.0 0.000 0.000 79.174 79.174 mp2_main 1 3.0 0.000 0.000 76.474 76.474 mp2_gpw_main 1 4.0 0.000 0.000 76.372 76.372 rpa_ri_compute_en 1 5.0 0.000 0.000 73.133 73.133 rpa_num_int 1 6.0 0.001 0.001 73.128 73.128 compute_mat_P_omega 1 7.0 0.003 0.003 63.105 63.105 compute_mat_P_omega_contract 10 8.0 8.901 8.901 62.896 62.896 dbt_total 2336 9.6 0.011 0.011 49.067 49.067 dbt_contract 787 11.0 0.033 0.033 42.440 42.440 dbt_tas_total 1149 12.2 0.188 0.188 41.373 41.373 dbt_tas_multiply 807 12.1 0.002 0.002 40.069 40.069 dbt_tas_dbm 807 14.1 0.003 0.003 33.744 33.744 dbm_multiply 807 16.1 33.736 33.736 33.736 33.736 dbt_tas_mm_1N 524 15.1 0.001 0.001 26.055 26.055 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 24.881 24.881 compute_mat_P_omega_calc_M_occ 250 9.0 8.923 8.923 16.614 16.614 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.066 7.066 dbt_tas_mm_2 251 15.0 0.001 0.001 6.119 6.119 dbt_copy 1103 10.7 0.070 0.070 5.306 5.306 compute_QP_energies 1 7.0 0.000 0.000 5.047 5.047 compute_self_energy_cubic_gw 1 8.0 0.054 0.054 5.046 5.046 contract_cubic_gw 21 9.0 0.000 0.000 4.033 4.033 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 3.232 3.232 dbt_tas_reserve_blocks_index 3261 14.3 0.146 0.146 3.172 3.172 dbm_reserve_blocks 3628 15.3 3.087 3.087 3.087 3.087 scf_env_do_scf 1 3.0 0.000 0.000 2.599 2.599 scf_env_do_scf_inner_loop 17 4.0 0.001 0.001 2.599 2.599 dbt_reserve_blocks_index 2280 13.1 0.048 0.048 2.379 2.379 dbt_reserve_blocks_index_array 2222 12.2 0.008 0.008 2.369 2.369 convert_to_new_pgrid 2421 14.1 0.088 0.088 2.332 2.332 dbt_crop 1042 12.0 1.481 1.481 2.272 2.272 dbm_copy 1614 15.1 2.244 2.244 2.244 2.244 dbt_tas_copy 574 11.4 1.329 1.329 2.200 2.200 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.156 2.156 dbt_tas_reshape 367 15.0 0.006 0.006 2.011 2.011 compute_W_cubic_GW 10 7.0 0.012 0.012 1.951 1.951 dbt_reshape 278 11.9 0.928 0.928 1.735 1.735 get_2c_integrals 1 6.0 0.000 0.000 1.720 1.720 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.028 30.186 30.199 qs_energies 1 2.0 0.000 0.000 30.097 30.098 mp2_main 1 3.0 0.000 0.001 29.148 29.149 mp2_gpw_main 1 4.0 0.000 0.000 29.111 29.112 rpa_ri_compute_en 1 5.0 0.000 0.000 27.903 27.904 rpa_num_int 1 6.0 0.000 0.002 27.902 27.903 dbt_total 2336 9.6 0.010 0.011 24.751 24.756 compute_mat_P_omega 1 7.0 0.001 0.005 23.722 23.750 compute_mat_P_omega_contract 10 8.0 0.382 0.409 23.555 23.559 dbt_contract 787 11.0 0.024 0.026 18.722 18.728 dbt_tas_total 1149 12.2 0.047 0.063 16.825 16.827 dbt_tas_multiply 807 12.1 0.002 0.002 16.780 16.783 dbt_tas_dbm 807 14.1 0.003 0.003 12.397 12.401 dbm_multiply 807 16.1 9.587 10.481 9.587 10.481 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.146 7.147 compute_mat_P_omega_calc_M_occ 250 9.0 0.372 0.397 6.934 6.934 dbt_tas_mm_2 251 15.0 0.001 0.001 5.906 5.910 mp_sync 8706 11.6 4.538 5.798 4.538 5.798 dbt_copy 1111 10.7 0.011 0.012 5.263 5.496 dbt_reshape 1098 11.7 1.982 2.267 5.008 5.234 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 4.978 4.979 dbt_tas_mm_1N 524 15.1 0.001 0.001 4.391 4.824 compute_QP_energies 1 7.0 0.000 0.000 2.649 2.650 compute_self_energy_cubic_gw 1 8.0 0.002 0.003 2.646 2.649 mp_waitall_2 3776 15.3 2.362 2.588 2.362 2.588 dbt_communicate_buffer 1098 12.7 0.052 0.063 2.410 2.521 contract_cubic_gw 21 9.0 0.000 0.000 2.080 2.080 dbt_reserve_blocks_index 2849 13.1 0.059 0.065 1.452 1.674 dbt_reserve_blocks_index_array 2791 12.2 0.008 0.010 1.452 1.672 dbt_tas_reserve_blocks_index 3300 14.5 0.112 0.138 1.426 1.645 dbm_reserve_blocks 3696 15.4 1.401 1.621 1.401 1.621 dbt_crop 1042 12.0 0.858 1.102 1.312 1.504 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.204 1.207 dbt_tas_replicate 396 14.1 0.528 0.678 1.074 1.182 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 0.934 0.937 cp_gemm 105 8.4 0.000 0.000 0.929 0.937 cp_gemm_cosma 105 9.4 0.929 0.937 0.929 0.937 scf_env_do_scf 1 3.0 0.000 0.000 0.910 0.910 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 0.910 0.910 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 0.873 0.875 convert_to_new_pgrid 2421 14.1 0.023 0.026 0.723 0.860 dbm_copy 1608 15.1 0.695 0.835 0.695 0.835 dbm_add 807 14.1 0.603 0.697 0.603 0.697 mp_max_i 1992 9.8 0.515 0.686 0.515 0.686 compute_W_cubic_GW 10 7.0 0.001 0.001 0.679 0.686 mp_sum_l 6085 13.0 0.452 0.605 0.452 0.605 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=21.724999999999994, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=33.736, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.923, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.901, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.087, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.244, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.928, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=8.866999999999997, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=9.587, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.372, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.382, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.401, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.695, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=1.982, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=4.538, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.362, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 322.844 322.844 qs_forces 1 2.0 0.000 0.000 322.282 322.282 rebuild_ks_matrix 7 6.6 0.000 0.000 320.847 320.847 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 320.847 320.847 hfx_ks_matrix 7 8.6 0.000 0.000 318.874 318.874 dbt_total 4903 11.6 0.027 0.027 263.545 263.545 hfx_ri_update_ks 7 9.6 0.000 0.000 259.070 259.070 hfx_ri_update_ks_Pmat 7 10.6 33.221 33.221 259.067 259.067 dbt_tas_total 2391 14.1 1.588 1.588 238.011 238.011 qs_energies 1 3.0 0.000 0.000 226.251 226.251 scf_env_do_scf 1 4.0 0.000 0.000 225.970 225.970 qs_ks_update_qs_env 8 6.0 0.000 0.000 224.866 224.866 dbt_contract 1473 13.0 0.130 0.130 217.379 217.379 dbt_tas_multiply 1482 14.0 0.004 0.004 207.600 207.600 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 194.933 194.933 dbt_tas_dbm 1482 16.0 0.005 0.005 181.578 181.578 dbm_multiply 1482 18.0 181.564 181.564 181.564 181.564 dbt_tas_mm_2 649 17.1 0.004 0.004 166.128 166.128 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 143.161 143.161 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 95.983 95.983 init_scf_loop 2 5.0 0.000 0.000 82.808 82.808 hfx_ri_update_forces 1 7.0 1.588 1.588 59.801 59.801 hfx_ri_forces_Pmat_3c 1 8.0 1.606 1.606 41.352 41.352 dbt_tas_reshape 906 14.4 0.010 0.010 23.858 23.858 dbt_copy 2373 12.3 0.102 0.102 15.716 15.716 dbt_tas_merge 649 14.1 13.092 13.092 14.600 14.600 precalc_derivatives 1 8.0 2.883 2.883 14.088 14.088 dbt_tas_reshape_buffer_fill 906 15.4 13.808 13.808 13.808 13.808 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 11.201 11.201 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 11.072 11.072 dbm_reserve_blocks 8426 16.7 10.242 10.242 10.242 10.242 dbt_tas_mm_3T 659 17.1 0.002 0.002 10.231 10.231 dbt_crop 2763 14.2 6.996 6.996 9.568 9.568 dbt_tas_reserve_blocks_index 7520 15.9 0.331 0.331 9.515 9.515 dbt_reshape 850 13.9 5.288 5.288 9.030 9.030 reshape_mm_small 906 15.6 0.135 0.135 8.367 8.367 dbt_tas_replicate 906 15.6 6.040 6.040 8.281 8.281 build_3c_derivatives 9 9.0 3.328 3.328 7.444 7.444 dbt_tas_reshape_buffer_obtain 906 15.4 6.228 6.228 7.058 7.058 dbt_reserve_blocks_index 5073 15.1 0.114 0.114 6.929 6.929 dbt_reserve_blocks_index_array 5038 14.1 0.016 0.016 6.862 6.862 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.024 54.755 54.763 qs_forces 1 2.0 0.000 0.000 54.525 54.525 rebuild_ks_matrix 7 6.6 0.000 0.000 53.869 53.870 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 53.869 53.870 hfx_ks_matrix 7 8.6 0.000 0.000 52.912 52.923 dbt_total 4903 11.6 0.024 0.028 47.374 47.379 dbt_contract 1473 13.0 0.085 0.092 37.131 37.139 hfx_ri_update_ks 7 9.6 0.000 0.000 37.036 37.036 hfx_ri_update_ks_Pmat 7 10.6 1.339 1.837 37.035 37.035 dbt_tas_total 2391 14.1 0.106 0.128 34.990 34.992 qs_energies 1 3.0 0.000 0.000 33.939 33.940 scf_env_do_scf 1 4.0 0.000 0.001 33.795 33.795 qs_ks_update_qs_env 8 6.0 0.000 0.000 33.297 33.297 dbt_tas_multiply 1482 14.0 0.004 0.005 31.174 31.178 dbt_tas_dbm 1482 16.0 0.004 0.005 24.023 24.044 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 20.873 20.874 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 20.574 20.574 dbm_multiply 1482 18.0 16.221 20.492 16.221 20.492 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 19.876 19.876 hfx_ri_update_forces 1 7.0 0.060 0.064 15.876 15.887 dbt_tas_mm_2 649 17.1 0.003 0.003 14.793 14.814 mp_sync 17597 13.5 11.812 14.645 11.812 14.645 init_scf_loop 2 5.0 0.000 0.000 13.919 13.919 hfx_ri_forces_Pmat_3c 1 8.0 0.051 0.063 11.376 11.404 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 7.347 7.348 dbt_copy 2391 12.4 0.027 0.030 5.344 5.829 dbt_crop 2763 14.2 2.834 4.024 3.481 4.698 dbt_tas_mm_3T 659 17.1 0.001 0.002 3.918 4.479 dbt_reshape 1250 13.5 1.842 2.258 3.838 4.124 dbt_tas_mm_3N 163 16.5 0.000 0.000 3.571 3.675 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.634 3.634 precalc_derivatives 1 8.0 0.091 0.125 3.365 3.365 dbt_tas_merge 649 14.1 1.542 2.153 2.626 3.141 mp_waitall_2 5961 16.5 2.637 2.866 2.637 2.866 dbm_reserve_blocks 8460 16.8 1.894 2.260 1.894 2.260 dbt_tas_reserve_blocks_index 7551 16.0 0.257 0.346 1.824 2.224 dbt_tas_replicate 909 15.6 0.576 0.719 2.123 2.184 mp_max_i 3372 12.5 1.789 2.133 1.789 2.133 dbt_tas_communicate_buffer 1825 16.3 0.058 0.075 1.898 2.108 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 1.990 1.997 build_3c_derivatives 9 9.0 0.225 0.330 1.938 1.941 dbt_reserve_blocks_index 5473 15.1 0.105 0.131 1.495 1.792 dbt_reserve_blocks_index_array 5438 14.1 0.012 0.015 1.492 1.789 dbt_tas_reshape 916 14.4 0.007 0.009 1.498 1.569 mp_alltoall_i 4327 15.3 1.391 1.544 1.391 1.544 dbt_communicate_buffer 1250 14.5 0.041 0.056 1.377 1.477 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.001 0.002 1.353 1.354 convert_to_new_pgrid 4446 16.0 0.035 0.042 1.153 1.346 dbm_copy 3043 16.9 1.117 1.307 1.117 1.307 mp_sum_l 38255 15.3 0.957 1.247 0.957 1.247 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=63.92099999999999, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=181.564, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=33.221, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=13.808, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=13.092, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=10.242, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=6.996, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=16.476000000000006, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=16.221, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.339, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.542, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.894, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=2.834, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=11.812, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.637, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 178.053 178.053 qs_energies 1 2.0 0.000 0.000 177.873 177.873 mp2_main 1 3.0 0.000 0.000 173.178 173.178 mp2_gpw_main 1 4.0 0.001 0.001 172.783 172.783 mp2_ri_gpw_compute_in 1 5.0 0.371 0.371 124.813 124.813 mp2_ri_gpw_compute_in_loop 1 6.0 0.009 0.009 113.681 113.681 mp2_eri_3c_integrate_gpw 2656 7.0 0.012 0.012 85.071 85.071 integrate_v_rspace 2666 8.0 0.605 0.605 72.093 72.093 grid_integrate_task_list 2666 9.0 69.506 69.506 69.506 69.506 mp2_ri_gpw_compute_en 1 5.0 0.085 0.085 47.948 47.948 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.364 9.364 46.190 46.190 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.150 2.150 29.464 29.464 offload_gemm 2080 8.0 27.314 27.314 27.314 27.314 dbcsr_multiply_generic 5322 8.0 0.168 0.168 19.707 19.707 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 19.687 19.687 calculate_wavefunction 2656 8.0 8.123 8.123 11.738 11.738 pw_transfer 63872 10.6 1.025 1.025 11.334 11.334 get_2c_integrals 1 6.0 0.000 0.000 10.760 10.760 fft_wrap_pw1pw2 53228 11.4 0.109 0.109 10.095 10.095 compute_2c_integrals 1 7.0 0.006 0.006 9.967 9.967 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 9.948 9.948 mp2_eri_2c_integrate_gpw 1 9.0 3.207 3.207 9.937 9.937 multiply_cannon 5322 9.0 0.416 0.416 9.877 9.877 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.164 2.164 8.823 8.823 multiply_cannon_loop 5322 10.0 0.106 0.106 8.541 8.541 make_m2s 10644 9.0 0.060 0.060 7.737 7.737 make_images 10644 10.0 2.963 2.963 7.453 7.453 copy_dbcsr_to_fm 2679 8.0 0.025 0.025 7.257 7.257 multiply_cannon_multrec 5322 11.0 7.117 7.117 7.153 7.153 fft_wrap_pw1pw2_20 21271 12.4 0.464 0.464 7.101 7.101 fft3d_s 53229 13.4 6.538 6.538 6.570 6.570 mp2_ri_gpw_compute_en_ener 2080 7.0 5.581 5.581 5.581 5.581 dbcsr_complete_redistribute 2689 9.0 1.184 1.184 5.568 5.568 dbcsr_finalize 10708 9.5 0.148 0.148 4.947 4.947 dbcsr_merge_all 8011 10.3 3.367 3.367 4.336 4.336 scf_env_do_scf 1 3.0 0.000 0.000 4.287 4.287 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.286 4.286 potential_pw2rs 5322 10.0 0.139 0.139 3.885 3.885 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.020 39.384 39.394 qs_energies 1 2.0 0.000 0.000 39.313 39.313 mp2_main 1 3.0 0.000 0.001 37.143 37.143 mp2_gpw_main 1 4.0 0.001 0.002 37.026 37.026 mp2_ri_gpw_compute_in 1 5.0 0.050 0.051 17.181 22.461 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 15.826 21.104 mp2_ri_gpw_compute_en 1 5.0 0.165 0.171 19.756 20.267 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 13.842 19.239 integrate_v_rspace 93 8.1 0.097 0.109 13.771 19.063 grid_integrate_task_list 93 9.1 13.405 18.773 13.405 18.773 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.713 0.832 13.805 13.810 mp2_ri_gpw_compute_en_expansio 65 7.0 0.094 0.121 10.417 10.688 offload_gemm 65 8.0 10.323 10.578 10.323 10.578 mp_min_d 2 7.0 5.303 5.821 5.303 5.821 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.279 5.790 mp2_ri_gpw_compute_en_comm 17 7.0 0.063 0.092 2.350 2.893 mp_sendrecv_dm3 1054 8.0 1.859 2.585 1.859 2.585 scf_env_do_scf 1 3.0 0.000 0.000 2.042 2.042 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 2.042 2.042 dbcsr_multiply_generic 176 8.0 0.007 0.008 1.706 1.886 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.689 1.868 get_2c_integrals 1 6.0 0.000 0.000 1.286 1.319 qs_scf_new_mos 10 5.0 0.000 0.000 0.993 1.074 multiply_cannon 176 9.0 0.015 0.017 1.007 1.070 compute_2c_integrals 1 7.0 0.003 0.003 1.001 1.022 multiply_cannon_loop 176 10.0 0.002 0.002 0.952 1.015 eigensolver 11 5.8 0.001 0.001 0.973 0.975 compute_2c_integrals_loop_lm 1 8.0 0.002 0.003 0.742 0.943 mp2_eri_2c_integrate_gpw 1 9.0 0.200 0.320 0.740 0.940 multiply_cannon_multrec 246 11.0 0.821 0.856 0.826 0.861 pw_transfer 2120 10.5 0.042 0.052 0.759 0.840 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.820 0.824 cp_fm_redistribute_end 11 7.8 0.309 0.817 0.318 0.818 cp_fm_diag_elpa_base 11 7.8 0.482 0.778 0.494 0.793 make_m2s 352 9.0 0.003 0.003 0.663 0.788 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=56.62899999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=69.506, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=27.314, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.364, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.123, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.117, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=6.959999999999994, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.405, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=10.323, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.713, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.821, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=5.303, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.859, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.083 0.083 124.838 124.838 qs_energies 1 2.0 0.000 0.000 123.516 123.516 scf_env_do_scf 1 3.0 0.000 0.000 116.999 116.999 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 116.999 116.999 qs_ks_update_qs_env 15 5.0 0.000 0.000 49.672 49.672 rebuild_ks_matrix 15 6.0 0.000 0.000 49.462 49.462 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 49.462 49.462 qs_scf_new_mos 15 5.0 0.000 0.000 42.411 42.411 eigensolver 15 6.0 0.001 0.001 34.728 34.728 qs_vxc_create 15 8.0 0.039 0.039 34.182 34.182 calculate_dispersion_nonloc 15 9.0 7.046 7.046 29.677 29.677 pw_transfer 1191 10.0 0.057 0.057 22.773 22.773 cp_fm_diag_elpa 15 7.0 0.000 0.000 22.669 22.669 cp_fm_diag_elpa_base 15 8.0 20.042 20.042 22.669 22.669 fft_wrap_pw1pw2 1086 11.0 0.008 0.008 22.601 22.601 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.992 21.992 calculate_rho_elec 16 6.0 0.217 0.217 21.992 21.992 grid_collocate_task_list 16 7.0 20.644 20.644 20.644 20.644 fft_wrap_pw1pw2_150 765 12.0 3.520 3.520 16.355 16.355 sum_up_and_integrate 15 8.0 0.040 0.040 13.796 13.796 integrate_v_rspace 15 9.0 0.019 0.019 13.756 13.756 grid_integrate_task_list 15 10.0 13.244 13.244 13.244 13.244 fft3d_s 1087 13.0 9.879 9.879 9.887 9.887 cp_fm_cholesky_restore 45 7.0 9.786 9.786 9.786 9.786 pw_scatter_s 585 13.1 7.309 7.309 7.309 7.309 fft_wrap_pw1pw2_200 197 12.3 0.758 0.758 6.061 6.061 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.522 5.522 dbcsr_complete_redistribute 46 8.3 2.284 2.284 5.488 5.488 cp_fm_upper_to_full 30 8.0 4.898 4.898 4.898 4.898 vdW_energy 15 10.0 4.521 4.521 4.521 4.521 xc_vxc_pw_create 15 9.0 0.211 0.211 4.466 4.466 gspace_mixing 14 5.0 0.171 0.171 4.111 4.111 broyden_mixing 14 6.0 3.483 3.483 3.483 3.483 init_scf_run 1 3.0 0.000 0.000 3.070 3.070 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.991 2.991 xc_pw_derive 90 11.0 0.001 0.001 2.846 2.846 xc_rho_set_and_dset_create 15 10.0 0.154 0.154 2.538 2.538 calculate_dm_sparse 15 6.0 0.017 0.017 2.507 2.507 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.029 60.460 60.472 qs_energies 1 2.0 0.000 0.000 60.208 60.213 scf_env_do_scf 1 3.0 0.000 0.001 56.104 56.105 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 56.104 56.105 qs_ks_update_qs_env 15 5.0 0.000 0.000 24.169 24.200 rebuild_ks_matrix 15 6.0 0.000 0.000 24.134 24.165 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.003 24.134 24.165 qs_rho_update_rho_low 16 5.0 0.000 0.000 20.349 20.353 calculate_rho_elec 16 6.0 0.007 0.007 20.349 20.353 grid_collocate_task_list 16 7.0 19.141 19.735 19.141 19.735 sum_up_and_integrate 15 8.0 0.005 0.008 13.823 13.897 integrate_v_rspace 15 9.0 0.000 0.001 13.817 13.895 grid_integrate_task_list 15 10.0 12.827 13.320 12.827 13.320 qs_scf_new_mos 15 5.0 0.000 0.000 12.297 12.639 eigensolver 15 6.0 0.001 0.002 11.364 11.400 qs_vxc_create 15 8.0 0.001 0.001 10.022 10.032 calculate_dispersion_nonloc 15 9.0 0.963 1.775 8.248 8.278 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.244 8.248 cp_fm_diag_elpa_base 15 8.0 8.098 8.123 8.241 8.244 pw_transfer 1191 10.0 0.077 0.096 7.635 7.759 fft_wrap_pw1pw2 1086 11.0 0.011 0.013 7.478 7.633 fft3d_ps 1086 13.0 2.369 2.678 5.907 6.197 fft_wrap_pw1pw2_150 765 12.0 0.239 0.291 5.177 5.263 mp_alltoall_z22v 1086 15.0 3.016 3.677 3.016 3.677 cp_fm_cholesky_restore 45 7.0 2.975 3.051 2.975 3.051 yz_to_x 501 13.9 0.197 0.307 2.279 2.643 qs_energies_init_hamiltonians 1 3.0 0.000 0.004 2.470 2.473 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.150 2.349 fft_wrap_pw1pw2_200 197 12.3 0.160 0.197 2.203 2.283 xc_vxc_pw_create 15 9.0 0.014 0.020 1.773 1.803 x_to_yz 585 14.1 0.303 0.339 1.237 1.480 rs_pw_transfer 158 9.4 0.001 0.005 1.162 1.435 init_scf_run 1 3.0 0.000 0.001 1.405 1.406 build_core_ppnl 1 5.0 1.252 1.402 1.252 1.402 vdW_energy 15 10.0 1.312 1.371 1.312 1.371 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.310 1.311 xc_pw_derive 90 11.0 0.001 0.002 1.191 1.268 density_rs2pw 16 7.0 0.001 0.001 1.095 1.250 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=51.242999999999995, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.644, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=20.042, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.244, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.879, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=9.786, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=14.403000000000006, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.141, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.098, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=12.827, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=2.975, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.016, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.077 0.077 281.184 281.184 qs_energies 1 2.0 0.000 0.000 281.046 281.046 ls_scf 1 3.0 0.000 0.000 279.821 279.821 ls_scf_main 1 4.0 0.002 0.002 270.538 270.538 density_matrix_trs4 11 5.0 0.012 0.012 181.592 181.592 arnoldi_extremal 12 6.1 0.000 0.000 97.828 97.828 arnoldi_normal_ev 12 7.1 0.013 0.013 97.828 97.828 build_subspace 23 8.1 0.076 0.076 96.022 96.022 dbcsr_matrix_vector_mult 652 9.0 0.164 0.164 95.763 95.763 dbcsr_matrix_vector_mult_local 652 10.0 94.316 94.316 94.326 94.326 ls_scf_dm_to_ks 11 5.0 0.000 0.000 83.762 83.762 matrix_ls_to_qs 11 6.0 0.000 0.000 80.542 80.542 dbcsr_multiply_generic 185 6.1 0.862 0.862 73.325 73.325 dbcsr_copy_into_existing 11 7.0 44.252 44.252 44.252 44.252 multiply_cannon 185 7.1 0.286 0.286 43.735 43.735 dbcsr_complete_redistribute 23 7.5 29.176 29.176 39.951 39.951 matrix_decluster 11 7.0 0.000 0.000 36.290 36.290 multiply_cannon_loop 185 8.1 0.279 0.279 31.578 31.578 make_m2s 370 7.1 0.038 0.038 24.751 24.751 make_images 370 8.1 10.428 10.428 23.198 23.198 multiply_cannon_multrec 185 9.1 22.002 22.002 22.027 22.027 dbcsr_finalize 646 7.5 0.218 0.218 14.331 14.331 dbcsr_merge_all 597 8.5 2.315 2.315 13.183 13.183 setup_rec_index_2d 370 8.1 11.784 11.784 11.784 11.784 dbcsr_sort_indices 1103 9.9 10.724 10.724 10.724 10.724 tree_to_linear_d 110 9.4 9.644 9.644 9.644 9.644 quick_finalize 395 10.0 0.426 0.426 9.329 9.329 calculate_norms 370 9.1 9.272 9.272 9.272 9.272 dbcsr_special_finalize 370 9.1 0.002 0.002 8.634 8.634 ls_scf_init_scf 1 4.0 0.000 0.000 8.606 8.606 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.279 8.279 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.646 7.646 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.023 63.663 63.673 qs_energies 1 2.0 0.000 0.000 63.574 63.575 ls_scf 1 3.0 0.000 0.000 63.515 63.516 ls_scf_main 1 4.0 0.000 0.007 61.103 61.103 density_matrix_trs4 11 5.0 0.006 0.018 58.528 58.606 dbcsr_multiply_generic 185 6.1 0.056 0.081 54.620 55.022 multiply_cannon 185 7.1 0.031 0.037 45.426 46.223 multiply_cannon_loop 185 8.1 0.106 0.133 43.173 43.929 multiply_cannon_multrec 1480 9.1 26.534 29.772 26.787 30.050 mp_waitall_1 11936 10.3 14.247 18.356 14.247 18.356 multiply_cannon_metrocomm3 1480 9.1 0.012 0.017 8.325 13.446 multiply_cannon_metrocomm1 1480 9.1 0.007 0.010 3.357 7.344 make_m2s 370 7.1 0.033 0.035 6.304 6.380 make_images 370 8.1 0.616 0.701 6.180 6.252 calculate_norms 2960 9.1 4.543 6.131 4.543 6.131 arnoldi_extremal 12 6.1 0.000 0.001 3.064 3.084 arnoldi_normal_ev 12 7.1 0.001 0.003 3.064 3.084 mp_sum_l 1119 5.6 1.898 3.079 1.898 3.079 make_images_data 370 9.1 0.008 0.010 2.773 3.037 build_subspace 23 8.1 0.018 0.024 2.962 2.964 dbcsr_matrix_vector_mult 652 9.0 0.009 0.043 2.158 2.767 hybrid_alltoall_any 393 9.9 0.158 0.717 2.392 2.641 dbcsr_matrix_vector_mult_local 652 10.0 1.670 2.508 1.672 2.510 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.247 2.336 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.002 2.114 dbcsr_complete_redistribute 23 7.5 1.166 1.509 1.855 2.019 matrix_ls_to_qs 11 6.0 0.000 0.000 1.808 1.989 ls_scf_init_scf 1 4.0 0.000 0.000 1.855 1.856 ls_scf_init_matrix_S 1 5.0 0.000 0.000 1.830 1.835 matrix_decluster 11 7.0 0.000 0.000 1.667 1.816 make_images_pack 370 9.1 1.527 1.687 1.530 1.691 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.677 1.678 dbcsr_finalize 646 7.5 0.007 0.008 1.279 1.442 buffer_matrices_ensure_size 370 8.1 1.194 1.430 1.194 1.430 dbcsr_data_release 12861 10.1 0.909 1.358 0.909 1.358 mp_sum_d 1403 6.7 0.997 1.301 0.997 1.301 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=70.38200000000003, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=94.316, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=44.252, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=29.176, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=11.784, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.272, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=13.604999999999997, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.67, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.166, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=26.534, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.543, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.898, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=14.247, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 70.492 70.492 lib_test 1 2.0 0.000 0.000 70.484 70.484 dbcsr_run_tests 3 3.0 0.002 0.002 70.484 70.484 test_multiplies_multiproc 3 4.0 0.001 0.001 55.001 55.001 dbcsr_redistribute 9 5.0 35.776 35.776 37.328 37.328 dbcsr_multiply_generic 9 5.0 0.001 0.001 15.928 15.928 dbcsr_make_random_matrix 9 4.0 12.454 12.454 15.376 15.376 multiply_cannon 9 6.0 0.001 0.001 11.290 11.290 multiply_cannon_loop 9 7.0 0.032 0.032 10.935 10.935 multiply_cannon_multrec 9 8.0 10.903 10.903 10.904 10.904 dbcsr_finalize 27 5.7 0.022 0.022 5.397 5.397 dbcsr_merge_all 18 6.5 1.925 1.925 4.667 4.667 dbcsr_data_release 975 7.6 2.779 2.779 2.779 2.779 tree_to_linear_d 9 7.0 1.834 1.834 1.834 1.834 make_m2s 18 6.0 0.001 0.001 1.588 1.588 make_images 18 7.0 0.552 0.552 1.541 1.541 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.010 17.735 17.739 lib_test 1 2.0 0.000 0.000 17.708 17.726 dbcsr_run_tests 3 3.0 0.000 0.001 17.707 17.725 test_multiplies_multiproc 3 4.0 0.000 0.002 16.878 16.930 dbcsr_multiply_generic 9 5.0 0.001 0.001 14.964 15.074 multiply_cannon 9 6.0 0.001 0.002 13.259 13.563 multiply_cannon_loop 9 7.0 0.002 0.002 12.985 13.318 multiply_cannon_multrec 72 8.0 10.743 11.297 10.744 11.297 mp_waitall_1 576 9.2 2.530 3.547 2.530 3.547 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 1.984 2.938 dbcsr_make_random_matrix 9 4.0 0.671 0.935 0.802 1.031 dbcsr_data_release 444 7.6 0.787 0.894 0.787 0.894 mp_sum_l 390 2.5 0.413 0.854 0.413 0.854 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.405 0.846 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.249 0.820 dbcsr_finalize 27 5.7 0.000 0.000 0.678 0.765 dbcsr_destroy 111 5.9 0.000 0.000 0.635 0.736 make_m2s 18 6.0 0.001 0.001 0.627 0.670 make_images 18 7.0 0.021 0.026 0.624 0.666 dbcsr_merge_all 18 6.5 0.096 0.116 0.525 0.615 dbcsr_checksum 6 5.0 0.152 0.516 0.524 0.524 dbcsr_redistribute 9 5.0 0.225 0.273 0.381 0.415 make_images_data 18 8.0 0.000 0.001 0.325 0.404 mp_sum_d 191 1.2 0.372 0.391 0.372 0.391 dbcsr_data_copy_aa2 18 7.5 0.319 0.381 0.319 0.381 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.655000000000001, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=35.776, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.454, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.903, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.779, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=1.925, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.269999999999998, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.225, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.671, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.743, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.787, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.096, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.53, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.413, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.046 131.766 131.766 qs_mol_dyn_low 1 2.0 0.003 0.003 130.458 130.458 velocity_verlet 5 3.0 0.003 0.003 106.297 106.297 qmmm_el_coupling 6 3.8 0.000 0.000 86.731 86.731 qmmm_elec_with_gaussian 6 4.8 0.091 0.091 86.728 86.728 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 85.703 85.703 qmmm_elec_gaussian_low_G 6 6.8 84.783 84.783 84.783 84.783 qs_forces 6 3.8 0.000 0.000 35.188 35.188 qs_energies 6 4.8 0.000 0.000 31.225 31.225 scf_env_do_scf 6 5.8 0.001 0.001 29.038 29.038 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.403 25.403 rebuild_ks_matrix 45 8.4 0.000 0.000 24.673 24.673 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.672 24.672 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.130 21.130 pw_transfer 966 12.3 0.049 0.049 17.070 17.070 fft_wrap_pw1pw2 801 13.6 0.006 0.006 16.862 16.862 fft_wrap_pw1pw2_150 507 15.2 2.219 2.219 16.454 16.454 qs_vxc_create 45 10.4 0.001 0.001 13.563 13.563 xc_vxc_pw_create 45 11.4 0.694 0.694 13.563 13.563 xc_pw_derive 270 13.4 0.002 0.002 9.294 9.294 fft3d_s 802 15.6 7.740 7.740 7.747 7.747 xc_rho_set_and_dset_create 45 12.4 0.683 0.683 7.024 7.024 qs_rho_update_rho_low 45 7.9 0.000 0.000 6.965 6.965 calculate_rho_elec 45 8.9 0.563 0.563 6.964 6.964 xc_pw_divergence 45 12.4 0.001 0.001 5.745 5.745 pw_scatter_s 429 15.8 5.644 5.644 5.644 5.644 qmmm_forces 6 3.8 0.001 0.001 5.392 5.392 qmmm_forces_with_gaussian 6 4.8 0.122 0.122 5.059 5.059 pw_integral_ab 2539 7.4 4.388 4.388 4.388 4.388 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.230 4.230 qs_ks_ddapc 45 10.4 0.001 0.001 4.074 4.074 init_scf_loop 6 6.8 0.000 0.000 3.615 3.615 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.549 3.549 qmmm_forces_gaussian_low_G 6 6.8 3.521 3.521 3.521 3.521 density_rs2pw 45 9.9 0.001 0.001 3.239 3.239 grid_collocate_task_list 45 9.9 3.163 3.163 3.163 3.163 pw_poisson_solve 51 9.9 1.293 1.293 3.039 3.039 sum_up_and_integrate 45 10.4 0.114 0.114 2.965 2.965 integrate_v_rspace 45 11.4 0.006 0.006 2.851 2.851 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.038 0.055 54.851 54.861 qs_mol_dyn_low 1 2.0 0.003 0.004 53.678 53.734 qs_forces 6 3.8 0.000 0.001 38.252 38.253 qs_energies 6 4.8 0.000 0.000 36.463 36.463 scf_env_do_scf 6 5.8 0.000 0.001 35.520 35.520 scf_env_do_scf_inner_loop 113 6.2 0.002 0.015 34.100 34.101 rebuild_ks_matrix 119 8.1 0.000 0.000 24.761 24.773 qs_ks_build_kohn_sham_matrix 119 9.1 0.014 0.017 24.761 24.773 qs_ks_update_qs_env 119 7.3 0.001 0.001 23.286 23.298 velocity_verlet 5 3.0 0.002 0.003 22.814 22.817 pw_transfer 2446 12.3 0.167 0.192 15.756 16.201 fft_wrap_pw1pw2 2059 13.4 0.020 0.023 15.382 15.892 fft_wrap_pw1pw2_150 1321 14.9 1.097 1.367 14.781 15.198 fft3d_ps 2059 15.4 6.262 7.231 11.538 12.617 qs_vxc_create 119 10.1 0.002 0.002 12.446 12.450 xc_vxc_pw_create 119 11.1 0.143 0.214 12.444 12.448 qs_rho_update_rho_low 119 7.3 0.000 0.001 10.465 10.469 calculate_rho_elec 119 8.3 0.049 0.055 10.464 10.468 xc_pw_derive 714 13.1 0.009 0.012 9.341 9.635 sum_up_and_integrate 119 10.1 0.048 0.061 8.948 9.167 integrate_v_rspace 119 11.1 0.003 0.004 8.900 9.133 qmmm_forces 6 3.8 0.002 0.002 7.584 7.584 qmmm_forces_with_gaussian 6 4.8 0.362 0.440 7.124 7.437 rs_pw_transfer 988 11.5 0.010 0.014 6.963 7.297 qmmm_el_coupling 6 3.8 0.000 0.000 6.902 7.074 qmmm_elec_with_gaussian 6 4.8 0.383 0.438 6.900 7.072 xc_rho_set_and_dset_create 119 12.1 0.337 0.570 6.101 6.682 xc_pw_divergence 119 12.1 0.004 0.006 6.005 6.272 density_rs2pw 119 9.3 0.005 0.006 5.994 6.237 mp_alltoall_z22v 2059 17.4 4.003 5.728 4.003 5.728 potential_pw2rs 119 12.1 0.005 0.007 5.275 5.304 grid_collocate_task_list 119 9.3 4.300 4.555 4.300 4.555 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.838 4.069 grid_integrate_task_list 119 12.1 3.387 3.798 3.387 3.798 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.408 3.663 x_to_yz 1095 16.8 0.734 0.848 2.780 3.527 mp_waitany 4028 12.8 2.545 3.502 2.545 3.502 yz_to_x 964 16.0 0.486 0.730 2.443 3.484 qmmm_forces_gaussian_low_G 6 6.8 3.161 3.393 3.161 3.393 qmmm_elec_gaussian_low_G 6 6.8 2.820 3.077 2.820 3.077 rs_pw_transfer_PW2RS_150 125 13.9 1.124 1.426 2.726 2.807 rs_pw_transfer_RS2PW_150 125 11.2 0.875 1.201 2.339 2.661 pw_restrict_s3 18 5.8 1.304 1.509 2.299 2.518 dbcsr_multiply_generic 2588 12.3 0.056 0.068 2.082 2.129 qs_scf_new_mos 113 7.2 0.000 0.000 2.116 2.123 qs_scf_loop_do_ot 113 8.2 0.000 0.000 2.116 2.123 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 1.958 2.105 pw_prolongate_s3 18 6.8 1.219 1.367 1.958 2.105 mp_waitall_1 188862 16.2 1.715 2.102 1.715 2.102 ot_scf_mini 113 9.2 0.001 0.001 2.026 2.030 mp_sum_dm3 33 5.7 1.681 1.860 1.681 1.860 qs_ks_ddapc 119 10.1 0.002 0.002 1.739 1.838 pw_scatter_p 1095 15.8 1.540 1.572 1.540 1.572 pw_gather_p 964 15.0 1.160 1.551 1.160 1.551 mp_sum_d 5820 12.2 0.854 1.507 0.854 1.507 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.484 1.485 pw_integral_ab 2761 7.7 0.974 1.135 1.278 1.443 init_scf_loop 6 6.8 0.000 0.000 1.417 1.417 ot_mini 113 10.2 0.000 0.001 1.252 1.258 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=22.526999999999987, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=84.783, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.74, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.644, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.388, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.521, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.163, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=29.944, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.82, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=0.974, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.161, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.3, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.262, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.387, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.003, yerr=0.0 Summary: Performance test took 34 minutes. Status: OK Removing intermediate container e6c9cb884919 ---> 4128dd241d47 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 1a9025c45dcb Removing intermediate container 1a9025c45dcb ---> 0462af937b63 Step 42/42 : ENTRYPOINT [] ---> Running in 60b3ac412b41 Removing intermediate container 60b3ac412b41 ---> 6d7897c44156 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 6d7897c44156 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-09-10 11:56:49+00:00