StartDate: 2022-06-21 19:05:58+00:00 CpuId: 32x AMD (unknown model) [Zen 3], 7nm (SMT disabled) CommitSHA: 2a94081f0f2b5ee2cb829decdaaf3cbd3aa8810b CommitTime: 2022-06-21 19:46:55 +0200 CommitAuthor: Matthias Krack CommitSubject: Read cell parameters from REFTRAJ file generated with dumpdcd Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=2a94081f0f2b5ee2cb829decdaaf3cbd3aa8810b Sending build context to Docker daemon 363.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 405f018f9d1d: Already exists Digest: sha256:b6b83d3c331794420340093eb706a6f152d9c1fa51b262d9bf34594887c2c7ac Status: Downloaded newer image for ubuntu:22.04 ---> 27941809078c Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 92ee757f28a3 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> a398cc4ae5b3 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> e20712e9c254 Step 5/42 : RUN mkdir scripts ---> Using cache ---> c7b9413ca6be Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 5f4bcd2de9f5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> c39d97839810 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --with-gcc=system --dry-run ---> Using cache ---> d0b21d05b338 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 6e616c7670ff Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 5ba44cd61a38 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 1aa896c19a24 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> c7ccbf5e1b85 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 9cb6a1bd2cd3 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 1eed70bdd06a Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> ac04ff4ae473 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> cd0e2369620a Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 1b4ef27dc823 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 8c2ce1cbdb23 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> c08a0bc6f0ef Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 377bc99f74ae Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 415f340401e7 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 46d9c769ebc6 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 644fb72bd2a2 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 6c6ae39cc95a Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> e6a1ecc8aad2 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 87371125159a Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> ff89f8997806 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> a80648d1a85e Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 10306dcf3c6a Step 30/42 : COPY ./Makefile . ---> Using cache ---> 08c68107fb5d Step 31/42 : COPY ./src ./src ---> c774afcec82e Step 32/42 : COPY ./exts ./exts ---> 8d453f81b9ad Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> e493c0a17deb Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in 414e5b337d69 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 414e5b337d69 ---> daee0e74cb69 Step 35/42 : COPY ./data ./data ---> a64acc72864f Step 36/42 : COPY ./tests ./tests ---> 1334f9c4cd9b Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 059eef8f7dba Step 38/42 : COPY ./benchmarks ./benchmarks ---> 21a7bc0c5ece Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 0d8ffda81f39 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 795851dbf5c8 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 184.204 184.204 qs_mol_dyn_low 1 2.0 0.002 0.002 183.580 183.580 qs_forces 11 3.9 0.001 0.001 183.543 183.543 qs_energies 11 4.9 0.001 0.001 177.277 177.277 scf_env_do_scf 11 5.9 0.001 0.001 165.447 165.447 velocity_verlet 10 3.0 0.002 0.002 128.529 128.529 init_scf_loop 11 6.9 0.000 0.000 91.490 91.490 prepare_preconditioner 11 7.9 0.000 0.000 88.996 88.996 make_preconditioner 11 8.9 0.000 0.000 88.996 88.996 make_full_inverse_cholesky 11 9.9 0.000 0.000 87.928 87.928 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 73.828 73.828 cp_fm_cholesky_invert 11 10.9 61.495 61.495 61.495 61.495 qs_scf_new_mos 108 7.5 0.001 0.001 38.860 38.860 qs_scf_loop_do_ot 108 8.5 0.001 0.001 38.860 38.860 ot_scf_mini 108 9.5 0.002 0.002 37.458 37.458 cp_fm_cholesky_decompose 22 10.9 22.689 22.689 22.689 22.689 rebuild_ks_matrix 119 8.3 0.001 0.001 20.955 20.955 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 20.954 20.954 qs_ot_get_p 119 10.4 0.001 0.001 20.032 20.032 dbcsr_multiply_generic 2286 12.5 0.166 0.166 19.830 19.830 qs_rho_update_rho 119 7.7 0.001 0.001 19.595 19.595 calculate_rho_elec 119 8.7 0.956 0.956 19.594 19.594 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.148 19.148 qs_ot_p2m_diag 50 11.0 0.155 0.155 18.688 18.688 cp_dbcsr_syevd 50 12.0 0.002 0.002 18.296 18.296 cp_fm_diag_elpa 50 13.0 0.000 0.000 17.167 17.167 cp_fm_diag_elpa_base 50 14.0 17.106 17.106 17.166 17.166 grid_collocate_task_list 119 9.7 15.123 15.123 15.123 15.123 ot_mini 108 10.5 0.001 0.001 13.966 13.966 sum_up_and_integrate 119 10.3 0.198 0.198 13.162 13.162 integrate_v_rspace 119 11.3 0.092 0.092 12.963 12.963 grid_integrate_task_list 119 12.3 10.908 10.908 10.908 10.908 make_m2s 4572 13.5 0.046 0.046 10.705 10.705 qs_ot_get_derivative 108 11.5 0.001 0.001 8.512 8.512 pw_transfer 1439 11.6 0.055 0.055 6.278 6.278 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.239 6.239 dbcsr_make_dense_low 5837 15.5 0.077 0.077 6.033 6.033 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 6.026 6.026 make_dense_data 5837 16.5 5.344 5.344 5.942 5.942 ot_diis_step 108 11.5 0.004 0.004 5.451 5.451 make_images 4572 14.5 2.095 2.095 5.194 5.194 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.188 5.188 fft_wrap_pw1pw2_140 487 13.2 0.479 0.479 5.186 5.186 multiply_cannon 2286 13.5 0.171 0.171 5.177 5.177 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.970 4.970 apply_single 119 13.6 0.000 0.000 4.970 4.970 multiply_cannon_loop 2286 14.5 0.091 0.091 4.699 4.699 init_scf_run 11 5.9 0.002 0.002 4.671 4.671 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.669 4.669 multiply_cannon_multrec 2286 15.5 4.553 4.553 4.606 4.606 wfi_extrapolate 11 7.9 0.001 0.001 4.130 4.130 dbcsr_copy 2102 12.0 0.235 0.235 3.763 3.763 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.022 50.545 50.556 qs_mol_dyn_low 1 2.0 0.004 0.005 50.438 50.443 qs_forces 11 3.9 0.001 0.002 50.392 50.392 qs_energies 11 4.9 0.001 0.001 46.858 46.859 scf_env_do_scf 11 5.9 0.001 0.002 42.993 42.993 scf_env_do_scf_inner_loop 108 6.5 0.003 0.018 39.732 39.732 velocity_verlet 10 3.0 0.001 0.003 30.272 30.274 rebuild_ks_matrix 119 8.3 0.000 0.001 19.401 19.452 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.021 19.400 19.451 qs_ks_update_qs_env 119 7.6 0.001 0.001 17.284 17.331 sum_up_and_integrate 119 10.3 0.018 0.023 14.815 14.887 integrate_v_rspace 119 11.3 0.004 0.005 14.797 14.870 dbcsr_multiply_generic 2286 12.5 0.070 0.084 13.904 14.683 qs_rho_update_rho 119 7.7 0.001 0.001 14.587 14.604 calculate_rho_elec 119 8.7 0.030 0.031 14.587 14.603 grid_collocate_task_list 119 9.7 9.398 11.247 9.398 11.247 grid_integrate_task_list 119 12.3 8.459 11.131 8.459 11.131 qs_scf_new_mos 108 7.5 0.001 0.001 10.950 11.004 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.949 11.003 multiply_cannon 2286 13.5 0.137 0.146 10.114 10.396 ot_scf_mini 108 9.5 0.002 0.002 10.287 10.328 multiply_cannon_loop 2286 14.5 0.087 0.117 9.535 9.816 mp_waitall_1 169478 16.3 8.733 9.327 8.733 9.327 rs_pw_transfer 974 11.9 0.011 0.014 6.056 6.516 multiply_cannon_metrocomm3 18288 15.5 0.036 0.049 5.666 6.306 ot_mini 108 10.5 0.001 0.001 5.984 6.021 density_rs2pw 119 9.7 0.005 0.006 4.808 5.261 potential_pw2rs 119 12.3 0.007 0.009 3.552 3.596 multiply_cannon_multrec 18288 15.5 2.992 3.514 3.001 3.526 mp_alltoall_d11v 2130 13.8 2.997 3.386 2.997 3.386 mp_waitany 9880 13.7 2.820 3.355 2.820 3.355 pw_transfer 1439 11.6 0.086 0.094 3.286 3.342 init_scf_loop 11 6.9 0.000 0.000 3.247 3.247 qs_ot_get_derivative 108 11.5 0.001 0.001 3.145 3.186 fft_wrap_pw1pw2 1201 12.6 0.009 0.011 3.134 3.179 rs_gather_matrices 119 12.3 0.083 0.091 2.746 3.148 rs_pw_transfer_RS2PW_140 130 11.5 0.266 0.316 2.420 2.883 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.744 2.807 apply_single 119 13.6 0.000 0.000 2.743 2.806 ot_diis_step 108 11.5 0.003 0.004 2.802 2.802 fft_wrap_pw1pw2_140 487 13.2 0.250 0.286 2.618 2.696 init_scf_run 11 5.9 0.000 0.004 2.639 2.639 scf_env_initial_rho_setup 11 6.9 0.000 0.005 2.638 2.639 make_m2s 4572 13.5 0.045 0.057 2.468 2.560 fft3d_ps 1201 14.6 1.137 1.210 2.266 2.425 wfi_extrapolate 11 7.9 0.001 0.001 2.394 2.395 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.274 2.283 make_images 4572 14.5 0.118 0.146 2.146 2.241 rs_pw_transfer_PW2RS_140 130 13.9 0.516 0.615 1.495 1.565 mp_sum_l 11218 13.2 0.881 1.544 0.881 1.544 qs_ot_get_p 119 10.4 0.001 0.001 1.324 1.383 mp_sum_d 4129 12.0 1.121 1.381 1.121 1.381 make_images_data 4572 15.5 0.034 0.044 1.185 1.324 mp_alltoall_z22v 1201 16.6 0.896 1.248 0.896 1.248 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.901 1.231 prepare_preconditioner 11 7.9 0.000 0.000 1.166 1.180 make_preconditioner 11 8.9 0.000 0.001 1.166 1.180 hybrid_alltoall_any 4725 16.4 0.058 0.164 1.007 1.152 multiply_cannon_metrocomm1 18288 15.5 0.018 0.027 0.587 1.139 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.056 1.075 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.005 1.039 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.008 1.029 qs_energies_init_hamiltonians 11 5.9 0.000 0.002 1.017 1.018 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=52.33000000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=61.495, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=22.689, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=17.106, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.123, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.908, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.553, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.966, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.398, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.459, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.992, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.733, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=2.997, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.027 0.027 209.736 209.736 qs_mol_dyn_low 1 2.0 0.002 0.002 209.090 209.090 qs_forces 11 3.9 0.001 0.001 209.041 209.041 qs_energies 11 4.9 0.001 0.001 201.259 201.259 scf_env_do_scf 11 5.9 0.001 0.001 187.119 187.119 velocity_verlet 10 3.0 0.002 0.002 146.593 146.593 init_scf_loop 11 6.9 0.000 0.000 95.145 95.145 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 91.843 91.843 prepare_preconditioner 11 7.9 0.000 0.000 91.168 91.168 make_preconditioner 11 8.9 0.000 0.000 91.168 91.168 make_full_inverse_cholesky 11 9.9 0.000 0.000 90.003 90.003 cp_fm_cholesky_invert 11 10.9 62.234 62.234 62.234 62.234 qs_scf_new_mos 96 7.5 0.001 0.001 36.613 36.613 qs_scf_loop_do_ot 96 8.5 0.001 0.001 36.612 36.612 ot_scf_mini 96 9.5 0.002 0.002 35.332 35.332 rebuild_ks_matrix 107 8.3 0.001 0.001 33.758 33.758 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 33.758 33.758 qs_rho_update_rho 107 7.7 0.001 0.001 31.579 31.579 calculate_rho_elec 107 8.7 0.852 0.852 31.579 31.579 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.389 30.389 grid_collocate_task_list 107 9.7 27.525 27.525 27.525 27.525 sum_up_and_integrate 107 10.3 0.174 0.174 26.732 26.732 integrate_v_rspace 107 11.3 0.089 0.089 26.558 26.558 grid_integrate_task_list 107 12.3 24.730 24.730 24.730 24.730 cp_fm_cholesky_decompose 22 10.9 23.666 23.666 23.666 23.666 qs_ot_get_p 107 10.4 0.001 0.001 19.482 19.482 qs_ot_p2m_diag 44 11.0 0.137 0.137 18.325 18.325 cp_dbcsr_syevd 44 12.0 0.002 0.002 17.969 17.969 dbcsr_multiply_generic 1966 12.4 0.141 0.141 17.921 17.921 cp_fm_diag_elpa 44 13.0 0.000 0.000 17.003 17.003 cp_fm_diag_elpa_base 44 14.0 16.949 16.949 17.003 17.003 ot_mini 96 10.5 0.001 0.001 12.611 12.611 make_m2s 3932 13.4 0.041 0.041 9.630 9.630 qs_ot_get_derivative 96 11.5 0.001 0.001 7.829 7.829 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.407 7.407 init_scf_run 11 5.9 0.002 0.002 5.743 5.743 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.742 5.742 pw_transfer 1295 11.6 0.050 0.050 5.551 5.551 fft_wrap_pw1pw2 1081 12.6 0.005 0.005 5.316 5.316 dbcsr_make_dense_low 4961 15.5 0.058 0.058 5.275 5.275 make_dense_data 4961 16.5 4.645 4.645 5.205 5.205 wfi_extrapolate 11 7.9 0.001 0.001 5.121 5.121 make_images 3932 14.4 1.869 1.869 4.800 4.800 ot_diis_step 96 11.5 0.003 0.003 4.779 4.779 multiply_cannon 1966 13.4 0.163 0.163 4.710 4.710 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.554 4.554 fft_wrap_pw1pw2_140 439 13.2 0.400 0.400 4.543 4.543 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.361 4.361 apply_single 107 13.6 0.000 0.000 4.361 4.361 multiply_cannon_loop 1966 14.4 0.067 0.067 4.288 4.288 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.279 4.279 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.228 4.228 multiply_cannon_multrec 1966 15.4 4.175 4.175 4.221 4.221 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.020 82.767 82.776 qs_mol_dyn_low 1 2.0 0.003 0.004 82.662 82.666 qs_forces 11 3.9 0.001 0.001 82.621 82.621 qs_energies 11 4.9 0.001 0.001 77.011 77.013 scf_env_do_scf 11 5.9 0.000 0.002 71.262 71.263 scf_env_do_scf_inner_loop 96 6.5 0.002 0.017 66.071 66.071 velocity_verlet 10 3.0 0.001 0.003 49.328 49.329 rebuild_ks_matrix 107 8.3 0.000 0.000 36.363 36.423 qs_ks_build_kohn_sham_matrix 107 9.3 0.012 0.018 36.363 36.423 sum_up_and_integrate 107 10.3 0.015 0.017 32.354 32.404 integrate_v_rspace 107 11.3 0.004 0.004 32.339 32.389 qs_ks_update_qs_env 107 7.6 0.001 0.001 32.138 32.192 qs_rho_update_rho 107 7.7 0.001 0.001 31.161 31.169 calculate_rho_elec 107 8.7 0.026 0.027 31.160 31.168 grid_integrate_task_list 107 12.3 22.858 29.096 22.858 29.096 grid_collocate_task_list 107 9.7 22.131 28.146 22.131 28.146 dbcsr_multiply_generic 1966 12.4 0.060 0.073 12.357 12.446 rs_pw_transfer 878 11.9 0.009 0.011 9.857 10.665 qs_scf_new_mos 96 7.5 0.001 0.001 9.652 9.699 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.651 9.699 density_rs2pw 107 9.7 0.004 0.005 8.697 9.512 multiply_cannon 1966 13.4 0.119 0.136 9.125 9.381 ot_scf_mini 96 9.5 0.002 0.002 9.057 9.106 multiply_cannon_loop 1966 14.4 0.080 0.108 8.590 8.801 mp_waitall_1 146670 16.2 7.837 8.499 7.837 8.499 mp_waitany 8968 13.7 6.960 7.784 6.960 7.784 mp_alltoall_d11v 1998 13.7 6.567 7.514 6.567 7.514 rs_pw_transfer_RS2PW_140 118 11.5 0.220 0.250 6.585 7.420 rs_gather_matrices 107 12.3 0.065 0.074 6.332 7.251 multiply_cannon_metrocomm3 15728 15.4 0.033 0.045 5.109 5.689 ot_mini 96 10.5 0.001 0.001 5.321 5.370 init_scf_loop 11 6.9 0.000 0.000 5.179 5.179 init_scf_run 11 5.9 0.000 0.004 4.486 4.486 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.486 4.486 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.371 4.379 wfi_extrapolate 11 7.9 0.001 0.001 4.072 4.072 multiply_cannon_multrec 15728 15.4 2.686 3.196 2.695 3.207 potential_pw2rs 107 12.3 0.006 0.007 3.110 3.143 pw_transfer 1295 11.6 0.078 0.084 2.901 2.934 fft_wrap_pw1pw2 1081 12.6 0.007 0.009 2.766 2.817 qs_ot_get_derivative 96 11.5 0.001 0.001 2.749 2.791 ot_diis_step 96 11.5 0.003 0.004 2.541 2.542 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.479 2.529 apply_single 107 13.6 0.000 0.000 2.479 2.529 fft_wrap_pw1pw2_140 439 13.2 0.212 0.246 2.327 2.381 make_m2s 3932 13.4 0.039 0.049 2.142 2.212 fft3d_ps 1081 14.6 0.997 1.070 1.997 2.140 make_images 3932 14.4 0.103 0.125 1.859 1.934 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=54.63199999999998, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=62.234, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.525, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.73, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=23.666, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=16.949, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.414, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.131, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.858, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=6.567, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.837, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=6.96, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.180 0.180 149.911 149.911 qs_energies 1 2.0 0.000 0.000 149.087 149.087 scf_env_do_scf 1 3.0 0.000 0.000 147.576 147.576 qs_ks_update_qs_env 8 5.0 0.000 0.000 112.517 112.517 rebuild_ks_matrix 7 6.0 0.000 0.000 112.464 112.464 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 112.464 112.464 hfx_ks_matrix 7 8.0 0.000 0.000 94.707 94.707 integrate_four_center 7 9.0 1.264 1.264 94.653 94.653 integrate_four_center_main 7 10.0 0.751 0.751 82.999 82.999 init_scf_loop 1 4.0 0.000 0.000 82.516 82.516 integrate_four_center_bin 454 11.0 82.248 82.248 82.248 82.248 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 65.048 65.048 prepare_preconditioner 1 5.0 0.000 0.000 29.424 29.424 make_preconditioner 1 6.0 0.000 0.000 29.424 29.424 arnoldi_normal_ev 11 9.3 0.001 0.001 16.664 16.664 estimate_cond_num 1 7.0 0.000 0.000 16.619 16.619 build_subspace 28 9.5 0.009 0.009 16.266 16.266 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 10.199 10.199 integrate_four_center_load 7 10.0 0.000 0.000 10.143 10.143 hfx_load_balance 1 11.0 0.011 0.011 10.142 10.142 admm_fit_mo_coeffs 7 9.0 0.000 0.000 9.005 9.005 cp_fm_cholesky_invert 2 9.5 8.461 8.461 8.461 8.461 make_full_inverse_cholesky 1 7.0 0.000 0.000 8.425 8.425 dbcsr_sym_m_v_mult 562 10.0 0.014 0.014 7.513 7.513 DGKS_ortho_d 673 10.6 7.039 7.039 7.040 7.040 Gram_Schmidt_ortho_d 673 10.6 5.689 5.689 5.690 5.690 hfx_load_balance_bin 1 12.0 5.540 5.540 5.540 5.540 purify_mo_diag 7 10.0 0.000 0.000 4.804 4.804 hfx_load_balance_count 1 12.0 4.575 4.575 4.575 4.575 cp_fm_syevd 7 11.0 0.000 0.000 4.309 4.309 cp_fm_syevd_base 7 12.0 4.309 4.309 4.309 4.309 qs_scf_new_mos 7 5.0 0.000 0.000 4.291 4.291 qs_scf_loop_do_ot 7 6.0 0.000 0.000 4.291 4.291 make_full_single_inverse 1 7.0 0.000 0.000 4.207 4.207 ot_scf_mini 7 7.0 0.000 0.000 4.205 4.205 fit_mo_coeffs 7 10.0 0.000 0.000 4.200 4.200 arnoldi_generalized_ev 1 8.0 0.000 0.000 4.122 4.122 gev_build_subspace 4 9.0 0.004 0.004 3.956 3.956 qs_vxc_create 14 8.0 0.000 0.000 3.774 3.774 xc_vxc_pw_create 14 9.0 0.125 0.125 3.774 3.774 dbcsr_copy 1318 10.8 0.842 0.842 3.601 3.601 cp_fm_cholesky_decompose 3 8.7 3.103 3.103 3.103 3.103 dbcsr_create_new 3176 12.1 2.086 2.086 3.079 3.079 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.199 0.222 135.390 135.404 qs_energies 1 2.0 0.000 0.000 135.048 135.055 scf_env_do_scf 1 3.0 0.000 0.000 134.656 134.656 qs_ks_update_qs_env 8 5.0 0.000 0.000 132.496 132.496 rebuild_ks_matrix 7 6.0 0.000 0.000 132.487 132.487 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 132.487 132.487 hfx_ks_matrix 7 8.0 0.000 0.000 126.212 126.213 integrate_four_center 7 9.0 0.052 0.336 126.203 126.205 integrate_four_center_main 7 10.0 0.003 0.004 80.788 113.955 integrate_four_center_bin 448 11.0 80.786 113.951 80.786 113.951 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 76.167 76.167 init_scf_loop 1 4.0 0.000 0.000 58.488 58.488 mp_sync 70 11.3 33.194 35.469 33.194 35.469 integrate_four_center_load 7 10.0 0.000 0.000 11.537 11.541 hfx_load_balance 1 11.0 0.001 0.001 11.537 11.541 mp_sum_l 1135 8.3 5.869 6.144 5.869 6.144 hfx_load_balance_dist 1 12.0 0.000 0.000 5.742 6.009 hfx_load_balance_bin 1 12.0 2.859 5.769 2.859 5.769 hfx_load_balance_count 1 12.0 2.857 5.688 2.857 5.688 qs_vxc_create 14 8.0 0.000 0.001 3.003 3.003 xc_vxc_pw_create 14 9.0 0.008 0.010 3.003 3.003 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=36.35899999999998, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=82.248, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=8.461, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=7.039, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="Gram_Schmidt_ortho_d", label="Gram_Schmidt_ortho_d", y=5.689, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.54, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.575, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.824999999999989, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=80.786, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="Gram_Schmidt_ortho_d", label="Gram_Schmidt_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.859, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.857, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=5.869, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=33.194, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 155.093 155.093 qs_energies 1 2.0 0.000 0.000 154.716 154.716 mp2_main 1 3.0 0.000 0.000 131.617 131.617 mp2_gpw_main 1 4.0 0.000 0.000 130.270 130.270 rpa_ri_compute_en 1 5.0 0.000 0.000 123.707 123.707 rpa_num_int 1 6.0 0.001 0.001 123.701 123.701 compute_mat_P_omega 1 7.0 0.003 0.003 75.390 75.390 compute_mat_P_omega_contract 10 8.0 9.249 9.249 75.145 75.145 dbt_total 2336 9.6 0.012 0.012 61.529 61.529 dbt_contract 787 11.0 0.036 0.036 55.082 55.082 dbt_tas_total 1149 12.2 0.280 0.280 53.839 53.839 dbt_tas_multiply 807 12.1 0.002 0.002 52.523 52.523 dbt_tas_dbm 807 14.1 0.003 0.003 45.348 45.348 dbm_multiply 807 16.1 45.339 45.339 45.339 45.339 dbt_tas_mm_1N 524 15.1 0.002 0.002 32.830 32.830 GW_matrix_operations 10 7.0 0.004 0.004 30.835 30.835 cp_fm_cholesky_invert 10 8.0 30.040 30.040 30.040 30.040 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 26.813 26.813 scf_env_do_scf 1 3.0 0.000 0.000 22.818 22.818 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 22.818 22.818 compute_mat_P_omega_calc_M_occ 250 9.0 9.266 9.266 22.126 22.126 qs_scf_new_mos 17 5.0 0.000 0.000 21.248 21.248 eigensolver 18 5.9 0.001 0.001 19.898 19.898 cp_fm_diag_elpa 18 6.9 0.000 0.000 12.958 12.958 cp_fm_diag_elpa_base 18 7.9 12.913 12.913 12.958 12.958 cp_fm_cholesky_decompose 14 8.1 11.739 11.739 11.739 11.739 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.727 11.727 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 10.655 10.655 dbt_tas_mm_2 251 15.0 0.001 0.001 10.519 10.519 RPA_postprocessing_nokp 10 8.0 0.001 0.001 9.767 9.767 cp_fm_cholesky_restore 51 7.0 6.887 6.887 6.887 6.887 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 6.556 6.556 compute_QP_energies 1 7.0 0.000 0.000 5.691 5.691 compute_self_energy_cubic_gw 1 8.0 0.046 0.046 5.690 5.690 dbt_copy 1103 10.7 0.092 0.092 5.112 5.112 get_2c_integrals 1 6.0 0.000 0.000 4.988 4.988 contract_cubic_gw 21 9.0 0.000 0.000 4.622 4.622 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.022 31.565 31.575 qs_energies 1 2.0 0.000 0.000 31.489 31.490 mp2_main 1 3.0 0.000 0.000 30.546 30.547 mp2_gpw_main 1 4.0 0.000 0.002 30.510 30.511 rpa_ri_compute_en 1 5.0 0.000 0.000 29.291 29.291 rpa_num_int 1 6.0 0.001 0.008 29.290 29.291 dbt_total 2336 9.6 0.010 0.012 25.906 25.924 compute_mat_P_omega 1 7.0 0.001 0.005 24.578 24.604 compute_mat_P_omega_contract 10 8.0 0.365 0.398 24.408 24.413 dbt_contract 787 11.0 0.024 0.026 19.582 19.590 dbt_tas_total 1149 12.2 0.046 0.054 17.581 17.581 dbt_tas_multiply 807 12.1 0.002 0.002 17.524 17.526 dbt_tas_dbm 807 14.1 0.003 0.003 13.076 13.100 dbm_multiply 807 16.1 9.965 10.633 9.965 10.633 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.314 7.314 compute_mat_P_omega_calc_M_occ 250 9.0 0.360 0.400 7.093 7.093 mp_sync 8706 11.6 5.253 6.857 5.253 6.857 dbt_tas_mm_2 251 15.0 0.001 0.001 6.045 6.047 dbt_copy 1111 10.7 0.010 0.011 5.333 5.679 dbt_reshape 1098 11.7 1.943 2.473 5.079 5.410 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.293 5.298 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.579 5.155 compute_QP_energies 1 7.0 0.000 0.000 3.018 3.020 compute_self_energy_cubic_gw 1 8.0 0.002 0.003 3.014 3.018 mp_waitall_2 3776 15.3 2.526 2.704 2.526 2.704 dbt_communicate_buffer 1098 12.7 0.052 0.071 2.568 2.692 contract_cubic_gw 21 9.0 0.000 0.000 2.417 2.418 dbt_reserve_blocks_index_array 2791 12.2 0.008 0.010 1.370 1.674 dbt_reserve_blocks_index 2849 13.1 0.061 0.076 1.370 1.673 dbt_tas_reserve_blocks_index 3300 14.5 0.111 0.145 1.340 1.641 dbm_reserve_blocks 3696 15.4 1.317 1.627 1.317 1.627 dbt_crop 1042 12.0 0.841 1.141 1.290 1.595 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.216 1.218 dbt_tas_replicate 396 14.1 0.527 0.681 1.086 1.179 cp_gemm 105 8.4 0.000 0.000 0.971 0.979 cp_gemm_cosma 105 9.4 0.971 0.979 0.971 0.979 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 0.966 0.969 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 0.945 0.952 scf_env_do_scf 1 3.0 0.000 0.000 0.905 0.905 scf_env_do_scf_inner_loop 17 4.0 0.000 0.001 0.905 0.905 convert_to_new_pgrid 2421 14.1 0.022 0.027 0.705 0.867 dbm_copy 1608 15.1 0.678 0.838 0.678 0.838 mp_max_i 1992 9.8 0.649 0.810 0.649 0.810 GW_matrix_operations 10 7.0 0.001 0.001 0.766 0.774 dbm_add 807 14.1 0.569 0.684 0.569 0.684 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 0.654 0.654 dbt_tas_mm_3N 22 15.1 0.000 0.000 0.500 0.643 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=45.79599999999999, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=45.339, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=30.04, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=12.913, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=11.739, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=9.266, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=10.201000000000004, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=9.965, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.36, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.526, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=1.943, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.253, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.317, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 337.151 337.151 qs_forces 1 2.0 0.000 0.000 336.534 336.534 rebuild_ks_matrix 7 6.6 0.000 0.000 316.103 316.103 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 316.103 316.103 hfx_ks_matrix 7 8.6 0.000 0.000 314.073 314.073 hfx_ri_update_ks 7 9.6 0.000 0.000 265.823 265.823 hfx_ri_update_ks_Pmat 7 10.6 34.663 34.663 265.821 265.821 dbt_total 4939 11.6 0.028 0.028 256.696 256.696 qs_energies 1 3.0 0.000 0.000 254.375 254.375 scf_env_do_scf 1 4.0 0.000 0.000 253.894 253.894 qs_ks_update_qs_env 8 6.0 0.000 0.000 233.995 233.995 dbt_tas_total 2391 14.1 0.675 0.675 229.256 229.256 dbt_contract 1473 13.0 0.125 0.125 210.995 210.995 dbt_tas_multiply 1482 14.0 0.004 0.004 200.605 200.605 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.004 182.797 182.797 dbt_tas_dbm 1482 16.0 0.006 0.006 181.856 181.856 dbm_multiply 1482 18.0 181.838 181.838 181.838 181.838 dbt_tas_mm_2 649 17.1 0.004 0.004 153.811 153.811 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 144.813 144.813 init_scf_loop 2 5.0 0.000 0.000 109.079 109.079 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 82.112 82.112 hfx_ri_update_forces 1 7.0 0.000 0.000 48.247 48.247 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 32.937 32.937 dbt_tas_mm_3T 659 17.1 0.002 0.002 22.037 22.037 dbt_tas_reshape 906 14.4 0.018 0.018 20.636 20.636 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 20.153 20.153 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.002 19.399 19.399 prepare_preconditioner 2 6.0 0.000 0.000 17.419 17.419 dbt_copy 2411 12.3 0.246 0.246 17.008 17.008 make_preconditioner 2 7.0 0.000 0.000 16.909 16.909 cp_fm_syevd 12 10.7 0.000 0.000 16.821 16.821 cp_fm_syevd_base 12 11.7 16.821 16.821 16.821 16.821 make_full_all 2 8.0 0.000 0.000 16.424 16.424 dbt_tas_merge 649 14.1 12.426 12.426 13.345 13.345 precalc_derivatives 1 8.0 0.005 0.005 12.529 12.529 dbt_tas_reshape_buffer_fill 906 15.4 12.216 12.216 12.216 12.216 dbm_reserve_blocks 8383 16.8 10.781 10.781 10.781 10.781 dbt_tas_reserve_blocks_index 7477 16.0 0.354 0.354 10.400 10.400 dbt_crop 2763 14.2 6.877 6.877 10.128 10.128 dbt_reshape 856 13.9 5.379 5.379 9.525 9.525 hfx_ri_pre_scf_Pmat_2c 1 13.0 0.000 0.000 8.479 8.479 dbt_reserve_blocks_index 4998 15.2 0.132 0.132 8.122 8.122 dbt_reserve_blocks_index_array 4963 14.3 0.019 0.019 8.061 8.061 build_3c_derivatives 9 9.0 1.955 1.955 7.099 7.099 dbt_tas_reshape_buffer_obtain 906 15.4 6.330 6.330 7.010 7.010 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.020 61.535 61.544 qs_forces 1 2.0 0.000 0.000 61.345 61.345 rebuild_ks_matrix 7 6.6 0.000 0.000 60.663 60.664 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 60.663 60.664 hfx_ks_matrix 7 8.6 0.000 0.000 59.602 59.610 dbt_total 4939 11.6 0.027 0.032 54.103 54.129 dbt_contract 1473 13.0 0.092 0.108 41.894 41.910 hfx_ri_update_ks 7 9.6 0.000 0.000 41.803 41.803 hfx_ri_update_ks_Pmat 7 10.6 1.423 1.892 41.802 41.802 dbt_tas_total 2391 14.1 0.116 0.137 39.769 39.770 qs_energies 1 3.0 0.000 0.000 38.231 38.231 scf_env_do_scf 1 4.0 0.000 0.001 38.090 38.090 qs_ks_update_qs_env 8 6.0 0.000 0.000 37.568 37.569 dbt_tas_multiply 1482 14.0 0.005 0.005 35.373 35.382 dbt_tas_dbm 1482 16.0 0.005 0.005 27.486 27.529 hfx_ri_update_ks_Pmat_KS 567 11.6 0.004 0.005 23.901 23.902 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 23.097 23.097 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 22.863 22.863 dbm_multiply 1482 18.0 17.640 21.865 17.640 21.865 hfx_ri_update_forces 1 7.0 0.000 0.000 17.798 17.807 dbt_tas_mm_2 649 17.1 0.003 0.004 17.231 17.270 mp_sync 17669 13.5 14.156 16.837 14.156 16.837 init_scf_loop 2 5.0 0.000 0.000 15.227 15.227 hfx_ri_forces_Pmat_3c 1 8.0 0.002 0.002 12.305 12.316 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.002 0.003 8.212 8.213 dbt_copy 2429 12.3 0.030 0.033 6.603 7.044 dbt_reshape 1257 13.5 2.121 2.612 4.545 4.819 dbt_tas_mm_3T 659 17.1 0.002 0.002 4.361 4.785 dbt_crop 2763 14.2 3.013 3.830 3.723 4.555 precalc_derivatives 1 8.0 0.001 0.002 4.234 4.234 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.039 4.039 dbt_tas_mm_3N 163 16.5 0.000 0.001 3.794 3.926 mp_waitall_2 5874 16.6 3.098 3.373 3.098 3.373 dbt_tas_merge 649 14.1 1.609 2.444 2.868 3.363 dbm_reserve_blocks 8417 16.9 2.170 2.562 2.170 2.562 dbt_tas_reserve_blocks_index 7508 16.1 0.254 0.329 2.078 2.496 mp_max_i 3372 12.5 2.042 2.447 2.042 2.447 dbt_tas_communicate_buffer 1825 16.3 0.059 0.074 2.183 2.432 dbt_tas_replicate 909 15.6 0.606 0.733 2.293 2.353 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 2.335 2.343 dbt_reserve_blocks_index 5399 15.2 0.121 0.141 1.756 2.081 dbt_reserve_blocks_index_array 5364 14.2 0.013 0.015 1.754 2.079 mp_alltoall_i 4341 15.3 1.862 2.023 1.862 2.023 build_3c_derivatives 9 9.0 0.229 0.362 2.000 2.003 dbt_tas_reshape 916 14.4 0.008 0.010 1.875 1.977 dbt_communicate_buffer 1257 14.5 0.043 0.056 1.601 1.745 convert_to_new_pgrid 4446 16.0 0.039 0.046 1.283 1.472 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.001 0.002 1.457 1.459 dbm_copy 3043 16.9 1.244 1.435 1.244 1.435 mp_sum_l 38201 15.3 1.045 1.302 1.045 1.302 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=61.52900000000005, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=181.838, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=34.663, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=16.821, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_merge", label="dbt_tas_merge", y=12.426, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=12.216, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=10.781, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=6.877, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=18.425999999999995, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=17.64, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.423, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_merge", label="dbt_tas_merge", y=1.609, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_reshape_buffer_fill", label="dbt_tas_reshape_buffer_fill", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.17, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=3.013, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=14.156, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.098, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 233.843 233.843 qs_energies 1 2.0 0.000 0.000 233.661 233.661 mp2_main 1 3.0 0.000 0.000 199.674 199.674 mp2_gpw_main 1 4.0 0.001 0.001 195.807 195.807 mp2_ri_gpw_compute_in 1 5.0 0.374 0.374 143.372 143.372 mp2_ri_gpw_compute_in_loop 1 6.0 0.009 0.009 118.505 118.505 mp2_eri_3c_integrate_gpw 2656 7.0 0.012 0.012 90.044 90.044 integrate_v_rspace 2666 8.0 0.707 0.707 76.669 76.669 grid_integrate_task_list 2666 9.0 73.918 73.918 73.918 73.918 mp2_ri_gpw_compute_en 1 5.0 0.076 0.076 52.414 52.414 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.779 9.779 50.639 50.639 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.114 2.114 33.913 33.913 scf_env_do_scf 1 3.0 0.000 0.000 33.008 33.008 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 33.008 33.008 offload_gemm 2080 8.0 31.799 31.799 31.799 31.799 qs_scf_new_mos 10 5.0 0.000 0.000 31.672 31.672 get_2c_integrals 1 6.0 0.000 0.000 24.363 24.363 eigensolver 11 5.8 0.001 0.001 24.047 24.047 calculate_wavefunction 5312 9.0 15.876 15.876 23.279 23.279 dbcsr_multiply_generic 5322 8.0 0.176 0.176 22.323 22.323 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 22.292 22.292 cp_fm_diag_elpa 11 6.8 0.000 0.000 21.722 21.722 cp_fm_diag_elpa_base 11 7.8 21.574 21.574 21.722 21.722 compute_2c_integrals 1 7.0 0.006 0.006 18.006 18.006 compute_2c_integrals_loop_lm 1 8.0 0.012 0.012 17.987 17.987 mp2_eri_2c_integrate_gpw 1 9.0 3.356 3.356 17.975 17.975 pw_transfer 63872 10.6 0.877 0.877 11.608 11.608 multiply_cannon 5322 9.0 0.445 0.445 11.444 11.444 fft_wrap_pw1pw2 53228 11.4 0.103 0.103 10.445 10.445 multiply_cannon_loop 5322 10.0 0.275 0.275 10.068 10.068 qs_diis_b_step 9 6.0 0.000 0.000 9.365 9.365 make_m2s 10644 9.0 0.060 0.060 8.502 8.502 multiply_cannon_multrec 5322 11.0 8.311 8.311 8.348 8.348 cp_fm_symm 18 7.0 8.229 8.229 8.229 8.229 make_images 10644 10.0 3.231 3.231 8.201 8.201 fft_wrap_pw1pw2_20 21271 12.4 0.569 0.569 7.367 7.367 fft3d_s 53229 13.4 6.656 6.656 6.689 6.689 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.169 2.169 6.064 6.064 cp_fm_triangular_invert 2 6.0 5.443 5.443 5.443 5.443 mp2_ri_gpw_compute_en_ener 2080 7.0 5.088 5.088 5.088 5.088 copy_dbcsr_to_fm 2679 8.0 0.025 0.025 4.758 4.758 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.021 43.328 43.339 qs_energies 1 2.0 0.000 0.000 43.259 43.259 mp2_main 1 3.0 0.000 0.001 41.017 41.017 mp2_gpw_main 1 4.0 0.001 0.002 40.890 40.890 mp2_ri_gpw_compute_in 1 5.0 0.043 0.044 18.770 24.513 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 17.004 22.746 mp2_ri_gpw_compute_en 1 5.0 0.155 0.163 22.039 22.581 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 14.899 20.634 integrate_v_rspace 93 8.1 0.108 0.116 14.806 20.415 grid_integrate_task_list 93 9.1 14.430 20.094 14.430 20.094 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.856 1.006 15.245 15.262 mp2_ri_gpw_compute_en_expansio 65 7.0 0.086 0.097 11.342 11.539 offload_gemm 65 8.0 11.255 11.453 11.255 11.453 mp_min_d 2 7.0 5.802 6.327 5.802 6.327 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 5.741 6.284 mp2_ri_gpw_compute_en_comm 17 7.0 0.122 0.150 2.648 3.244 mp_sendrecv_dm3 510 8.0 1.961 2.653 1.961 2.653 scf_env_do_scf 1 3.0 0.000 0.000 2.115 2.116 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 2.115 2.116 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.798 1.952 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.781 1.939 get_2c_integrals 1 6.0 0.000 0.001 1.700 1.728 compute_2c_integrals 1 7.0 0.002 0.004 1.401 1.417 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 1.029 1.335 mp2_eri_2c_integrate_gpw 1 9.0 0.213 0.337 1.027 1.335 calculate_wavefunction 166 9.0 0.513 0.731 0.890 1.151 multiply_cannon 176 9.0 0.015 0.017 1.069 1.144 qs_scf_new_mos 10 5.0 0.000 0.000 1.034 1.097 multiply_cannon_loop 176 10.0 0.002 0.002 1.012 1.084 eigensolver 11 5.8 0.001 0.001 1.027 1.029 multiply_cannon_multrec 246 11.0 0.877 0.928 0.882 0.933 pw_transfer 2120 10.5 0.040 0.051 0.794 0.880 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.865 0.869 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=72.58599999999998, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=73.918, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=31.799, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=21.574, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=15.876, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.779, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.311, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.634, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.43, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=11.255, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.513, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.856, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.877, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=5.802, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.961, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.080 0.080 215.566 215.566 qs_energies 1 2.0 0.000 0.000 214.170 214.170 scf_env_do_scf 1 3.0 0.000 0.000 205.303 205.303 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 205.303 205.303 qs_scf_new_mos 15 5.0 0.000 0.000 128.845 128.845 eigensolver 15 6.0 0.001 0.001 121.276 121.276 cp_fm_diag_elpa 15 7.0 0.000 0.000 107.851 107.851 cp_fm_diag_elpa_base 15 8.0 105.315 105.315 107.851 107.851 qs_ks_update_qs_env 15 5.0 0.000 0.000 50.387 50.387 rebuild_ks_matrix 15 6.0 0.000 0.000 50.177 50.177 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 50.177 50.177 qs_vxc_create 15 8.0 0.013 0.013 34.064 34.064 calculate_dispersion_nonloc 15 9.0 7.162 7.162 29.708 29.708 pw_transfer 1191 10.0 0.054 0.054 22.905 22.905 fft_wrap_pw1pw2 1086 11.0 0.009 0.009 22.720 22.720 qs_rho_update_rho 16 5.0 0.000 0.000 22.705 22.705 calculate_rho_elec 16 6.0 0.218 0.218 22.705 22.705 grid_collocate_task_list 16 7.0 21.383 21.383 21.383 21.383 fft_wrap_pw1pw2_150 765 12.0 3.187 3.187 16.239 16.239 sum_up_and_integrate 15 8.0 0.049 0.049 14.759 14.759 integrate_v_rspace 15 9.0 0.020 0.020 14.711 14.711 grid_integrate_task_list 15 10.0 14.191 14.191 14.191 14.191 cp_fm_cholesky_restore 45 7.0 11.194 11.194 11.194 11.194 fft3d_s 1087 13.0 10.639 10.639 10.669 10.669 pw_scatter_s 585 13.1 6.939 6.939 6.939 6.939 fft_wrap_pw1pw2_200 197 12.3 0.704 0.704 6.290 6.290 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.401 5.401 dbcsr_complete_redistribute 46 8.3 2.200 2.200 5.370 5.370 init_scf_run 1 3.0 0.000 0.000 5.296 5.296 cp_fm_upper_to_full 30 8.0 4.765 4.765 4.765 4.765 gspace_mixing 14 5.0 0.171 0.171 4.628 4.628 vdW_energy 15 10.0 4.517 4.517 4.517 4.517 xc_vxc_pw_create 15 9.0 0.237 0.237 4.344 4.344 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.028 64.331 64.343 qs_energies 1 2.0 0.000 0.001 64.079 64.085 scf_env_do_scf 1 3.0 0.000 0.001 59.949 59.950 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 59.949 59.949 qs_ks_update_qs_env 15 5.0 0.000 0.000 25.327 25.346 rebuild_ks_matrix 15 6.0 0.000 0.000 25.292 25.311 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.003 25.292 25.311 qs_rho_update_rho 16 5.0 0.000 0.000 21.623 21.628 calculate_rho_elec 16 6.0 0.007 0.007 21.623 21.628 grid_collocate_task_list 16 7.0 19.739 20.179 19.739 20.179 sum_up_and_integrate 15 8.0 0.006 0.009 14.508 14.561 integrate_v_rspace 15 9.0 0.001 0.001 14.501 14.558 grid_integrate_task_list 15 10.0 13.429 13.892 13.429 13.892 qs_scf_new_mos 15 5.0 0.000 0.000 13.615 13.815 eigensolver 15 6.0 0.001 0.002 12.598 12.635 qs_vxc_create 15 8.0 0.001 0.001 10.479 10.490 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.275 9.282 cp_fm_diag_elpa_base 15 8.0 9.130 9.156 9.273 9.279 calculate_dispersion_nonloc 15 9.0 0.972 1.749 8.582 8.603 pw_transfer 1191 10.0 0.080 0.094 8.085 8.205 fft_wrap_pw1pw2 1086 11.0 0.012 0.013 7.918 8.067 fft3d_ps 1086 13.0 2.469 2.684 6.268 6.627 fft_wrap_pw1pw2_150 765 12.0 0.264 0.299 5.426 5.514 mp_alltoall_z22v 1086 15.0 3.231 4.052 3.231 4.052 cp_fm_cholesky_restore 45 7.0 3.163 3.213 3.163 3.213 yz_to_x 501 13.9 0.219 0.272 2.458 2.897 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.493 2.493 fft_wrap_pw1pw2_200 197 12.3 0.178 0.206 2.359 2.426 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.150 2.375 rs_pw_transfer 158 9.4 0.001 0.002 1.847 2.102 density_rs2pw 16 7.0 0.001 0.001 1.765 1.970 xc_vxc_pw_create 15 9.0 0.015 0.019 1.897 1.932 mp_waitany 520 11.3 1.313 1.724 1.313 1.724 x_to_yz 585 14.1 0.325 0.346 1.317 1.641 rs_pw_transfer_RS2PW_200 18 8.8 0.037 0.044 0.970 1.463 vdW_energy 15 10.0 1.340 1.407 1.340 1.407 build_core_ppnl 1 5.0 1.255 1.403 1.255 1.403 init_scf_run 1 3.0 0.000 0.000 1.400 1.401 xc_pw_derive 90 11.0 0.001 0.001 1.283 1.364 scf_env_initial_rho_setup 1 4.0 0.000 0.001 1.303 1.304 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=52.84400000000002, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=105.315, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.383, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.191, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.194, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.639, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.639000000000003, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.13, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.739, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.429, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.163, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.231, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.075 0.075 319.478 319.478 qs_energies 1 2.0 0.000 0.000 319.342 319.342 ls_scf 1 3.0 0.000 0.000 318.112 318.112 ls_scf_main 1 4.0 0.002 0.002 305.781 305.781 density_matrix_trs4 11 5.0 0.012 0.012 211.487 211.487 arnoldi_extremal 12 6.1 0.000 0.000 127.957 127.957 arnoldi_normal_ev 12 7.1 0.017 0.017 127.957 127.957 build_subspace 23 8.1 0.087 0.087 126.113 126.113 dbcsr_matrix_vector_mult 652 9.0 0.214 0.214 102.239 102.239 dbcsr_matrix_vector_mult_local 652 10.0 91.378 91.378 91.383 91.383 ls_scf_dm_to_ks 11 5.0 0.000 0.000 88.968 88.968 matrix_ls_to_qs 11 6.0 0.000 0.000 85.706 85.706 dbcsr_multiply_generic 185 6.1 0.841 0.841 75.632 75.632 dbcsr_copy_into_existing 11 7.0 46.922 46.922 46.922 46.922 multiply_cannon 185 7.1 0.317 0.317 46.252 46.252 dbcsr_complete_redistribute 23 7.5 31.274 31.274 42.519 42.519 matrix_decluster 11 7.0 0.000 0.000 38.782 38.782 multiply_cannon_loop 185 8.1 0.264 0.264 33.536 33.536 make_m2s 370 7.1 0.038 0.038 24.853 24.853 multiply_cannon_multrec 185 9.1 23.817 23.817 23.957 23.957 make_images 370 8.1 10.602 10.602 23.129 23.129 dbcsr_finalize 646 7.5 0.203 0.203 15.113 15.113 dbcsr_merge_all 597 8.5 2.372 2.372 13.907 13.907 DGKS_ortho_d 702 9.1 12.680 12.680 12.682 12.682 setup_rec_index_2d 370 8.1 12.284 12.284 12.284 12.284 ls_scf_init_scf 1 4.0 0.000 0.000 11.607 11.607 ls_scf_init_matrix_S 1 5.0 0.000 0.000 11.280 11.280 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 10.626 10.626 Gram_Schmidt_ortho_d 702 9.1 10.589 10.589 10.590 10.590 tree_to_linear_d 110 9.4 10.217 10.217 10.217 10.217 dbcsr_sort_indices 1103 9.9 10.206 10.206 10.206 10.206 calculate_norms 370 9.1 9.314 9.314 9.314 9.314 quick_finalize 395 10.0 0.362 0.362 8.822 8.822 dbcsr_special_finalize 370 9.1 0.002 0.002 8.145 8.145 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.020 66.561 66.572 qs_energies 1 2.0 0.000 0.000 66.481 66.481 ls_scf 1 3.0 0.000 0.000 66.423 66.424 ls_scf_main 1 4.0 0.001 0.009 63.793 63.794 density_matrix_trs4 11 5.0 0.006 0.017 61.161 61.244 dbcsr_multiply_generic 185 6.1 0.058 0.070 57.215 57.413 multiply_cannon 185 7.1 0.032 0.034 47.574 48.618 multiply_cannon_loop 185 8.1 0.109 0.125 45.142 46.140 multiply_cannon_multrec 1480 9.1 28.014 30.876 28.279 31.138 mp_waitall_1 11936 10.3 14.569 18.168 14.569 18.168 multiply_cannon_metrocomm3 1480 9.1 0.013 0.016 8.530 13.847 make_m2s 370 7.1 0.034 0.037 6.704 6.765 make_images 370 8.1 0.636 0.721 6.575 6.638 calculate_norms 2960 9.1 4.791 6.307 4.791 6.307 multiply_cannon_metrocomm1 1480 9.1 0.007 0.009 3.374 5.468 arnoldi_extremal 12 6.1 0.000 0.003 3.308 3.318 arnoldi_normal_ev 12 7.1 0.001 0.004 3.308 3.318 build_subspace 23 8.1 0.019 0.025 3.203 3.205 make_images_data 370 9.1 0.009 0.012 2.915 3.165 dbcsr_matrix_vector_mult 652 9.0 0.009 0.045 2.247 2.940 mp_sum_l 1119 5.6 2.020 2.799 2.020 2.799 hybrid_alltoall_any 393 9.9 0.197 1.155 2.535 2.798 dbcsr_matrix_vector_mult_local 652 10.0 1.684 2.597 1.686 2.599 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.280 2.354 dbcsr_complete_redistribute 23 7.5 1.190 1.550 1.909 2.062 ls_scf_init_scf 1 4.0 0.000 0.000 2.031 2.031 matrix_ls_to_qs 11 6.0 0.000 0.000 1.844 2.018 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.005 2.010 matrix_decluster 11 7.0 0.000 0.000 1.706 1.845 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.832 1.834 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.164 1.827 make_images_pack 370 9.1 1.600 1.804 1.603 1.808 buffer_matrices_ensure_size 370 8.1 1.303 1.478 1.303 1.478 dbcsr_finalize 646 7.5 0.008 0.012 1.280 1.411 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=104.09299999999999, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=91.378, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=46.922, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=31.274, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=23.817, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="DGKS_ortho_d", label="DGKS_ortho_d", y=12.68, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.314, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.293, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.684, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.19, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=28.014, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="DGKS_ortho_d", label="DGKS_ortho_d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.791, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.02, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=14.569, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 70.347 70.347 lib_test 1 2.0 0.000 0.000 70.340 70.340 dbcsr_run_tests 3 3.0 0.002 0.002 70.340 70.340 test_multiplies_multiproc 3 4.0 0.001 0.001 54.651 54.651 dbcsr_redistribute 9 5.0 34.819 34.819 36.368 36.368 dbcsr_multiply_generic 9 5.0 0.002 0.002 16.618 16.618 dbcsr_make_random_matrix 9 4.0 12.662 12.662 15.586 15.586 multiply_cannon 9 6.0 0.001 0.001 11.981 11.981 multiply_cannon_loop 9 7.0 0.046 0.046 11.621 11.621 multiply_cannon_multrec 9 8.0 11.574 11.574 11.575 11.575 dbcsr_finalize 27 5.7 0.048 0.048 5.487 5.487 dbcsr_merge_all 18 6.5 1.996 1.996 4.749 4.749 dbcsr_data_release 975 7.6 2.643 2.643 2.643 2.643 tree_to_linear_d 9 7.0 1.891 1.891 1.891 1.891 make_m2s 18 6.0 0.001 0.001 1.516 1.516 make_images 18 7.0 0.520 0.520 1.468 1.468 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.010 18.343 18.348 lib_test 1 2.0 0.000 0.000 18.316 18.334 dbcsr_run_tests 3 3.0 0.000 0.001 18.316 18.333 test_multiplies_multiproc 3 4.0 0.000 0.002 17.481 17.545 dbcsr_multiply_generic 9 5.0 0.001 0.001 15.681 15.765 multiply_cannon 9 6.0 0.001 0.002 13.938 14.295 multiply_cannon_loop 9 7.0 0.002 0.002 13.657 14.034 multiply_cannon_multrec 72 8.0 11.351 12.021 11.351 12.022 mp_waitall_1 576 9.2 2.626 3.507 2.626 3.507 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 2.056 2.990 dbcsr_make_random_matrix 9 4.0 0.662 0.916 0.792 1.002 mp_sum_l 390 2.5 0.487 0.808 0.487 0.808 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.483 0.803 dbcsr_data_release 444 7.6 0.636 0.741 0.636 0.741 make_m2s 18 6.0 0.001 0.001 0.683 0.725 make_images 18 7.0 0.021 0.026 0.679 0.722 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.242 0.670 dbcsr_finalize 27 5.7 0.000 0.000 0.594 0.666 dbcsr_destroy 111 5.9 0.000 0.000 0.527 0.593 dbcsr_merge_all 18 6.5 0.088 0.109 0.483 0.576 dbcsr_checksum 6 5.0 0.158 0.537 0.540 0.540 make_images_data 18 8.0 0.000 0.001 0.360 0.434 dbcsr_redistribute 9 5.0 0.229 0.276 0.396 0.425 mp_sum_d 191 1.2 0.383 0.401 0.383 0.401 hybrid_alltoall_any 18 9.0 0.028 0.145 0.316 0.379 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.652999999999992, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=34.819, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.662, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.574, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.643, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=1.996, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.264000000000003, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.229, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.662, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.351, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.636, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.088, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.626, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.487, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.042 0.042 144.044 144.044 qs_mol_dyn_low 1 2.0 0.003 0.003 142.553 142.553 velocity_verlet 5 3.0 0.003 0.003 117.034 117.034 qmmm_el_coupling 6 3.8 0.000 0.000 90.866 90.866 qmmm_elec_with_gaussian 6 4.8 0.081 0.081 90.855 90.855 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 90.139 90.139 qmmm_elec_gaussian_low_G 6 6.8 89.220 89.220 89.220 89.220 qs_forces 6 3.8 0.001 0.001 42.975 42.975 qs_energies 6 4.8 0.000 0.000 38.707 38.707 scf_env_do_scf 6 5.8 0.001 0.001 35.409 35.409 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.847 25.847 rebuild_ks_matrix 45 8.4 0.000 0.000 24.919 24.919 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.919 24.919 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.141 21.141 pw_transfer 966 12.3 0.048 0.048 16.907 16.907 fft_wrap_pw1pw2 801 13.6 0.006 0.006 16.671 16.671 fft_wrap_pw1pw2_150 507 15.2 2.075 2.075 16.213 16.213 qs_vxc_create 45 10.4 0.001 0.001 13.324 13.324 xc_vxc_pw_create 45 11.4 0.644 0.644 13.324 13.324 init_scf_loop 6 6.8 0.000 0.000 9.557 9.557 xc_pw_derive 270 13.4 0.002 0.002 9.199 9.199 fft3d_s 802 15.6 7.647 7.647 7.655 7.655 qs_rho_update_rho 45 7.9 0.000 0.000 7.211 7.211 calculate_rho_elec 45 8.9 0.563 0.563 7.210 7.210 xc_rho_set_and_dset_create 45 12.4 0.668 0.668 7.092 7.092 prepare_preconditioner 6 7.8 0.000 0.000 6.225 6.225 make_preconditioner 6 8.8 0.000 0.000 5.841 5.841 pw_scatter_s 429 15.8 5.622 5.622 5.622 5.622 make_full_all 6 9.8 0.001 0.001 5.560 5.560 xc_pw_divergence 45 12.4 0.001 0.001 5.528 5.528 qmmm_forces 6 3.8 0.001 0.001 5.516 5.516 qmmm_forces_with_gaussian 6 4.8 0.098 0.098 5.189 5.189 pw_integral_ab 2539 7.4 4.366 4.366 4.366 4.366 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.340 4.340 qs_ks_ddapc 45 10.4 0.001 0.001 4.262 4.262 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.784 3.784 qmmm_forces_gaussian_low_G 6 6.8 3.612 3.612 3.612 3.612 grid_collocate_task_list 45 9.9 3.413 3.413 3.413 3.413 sum_up_and_integrate 45 10.4 0.123 0.123 3.257 3.257 density_rs2pw 45 9.9 0.002 0.002 3.235 3.235 integrate_v_rspace 45 11.4 0.007 0.007 3.134 3.134 cp_fm_diag_elpa 18 11.2 0.000 0.000 3.073 3.073 cp_fm_diag_elpa_base 18 12.2 3.065 3.065 3.073 3.073 pw_poisson_solve 51 9.9 1.284 1.284 3.029 3.029 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.050 59.700 59.711 qs_mol_dyn_low 1 2.0 0.003 0.005 58.479 58.537 qs_forces 6 3.8 0.000 0.001 42.189 42.189 qs_energies 6 4.8 0.000 0.001 40.265 40.265 scf_env_do_scf 6 5.8 0.000 0.001 39.267 39.268 scf_env_do_scf_inner_loop 113 6.2 0.002 0.016 37.736 37.737 rebuild_ks_matrix 119 8.1 0.000 0.000 27.872 27.881 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.021 27.872 27.881 qs_ks_update_qs_env 119 7.3 0.001 0.001 26.243 26.251 velocity_verlet 5 3.0 0.002 0.003 24.556 24.559 pw_transfer 2446 12.3 0.177 0.210 17.693 18.215 fft_wrap_pw1pw2 2059 13.4 0.022 0.027 17.284 17.850 fft_wrap_pw1pw2_150 1321 14.9 1.260 1.446 16.569 17.059 fft3d_ps 2059 15.4 6.796 7.755 13.098 14.109 qs_vxc_create 119 10.1 0.002 0.004 14.043 14.045 xc_vxc_pw_create 119 11.1 0.150 0.213 14.040 14.043 qs_rho_update_rho 119 7.3 0.001 0.001 11.287 11.289 calculate_rho_elec 119 8.3 0.049 0.055 11.286 11.289 xc_pw_derive 714 13.1 0.008 0.010 10.561 10.883 sum_up_and_integrate 119 10.1 0.056 0.073 10.063 10.323 integrate_v_rspace 119 11.1 0.003 0.004 10.007 10.266 qmmm_forces 6 3.8 0.002 0.002 8.219 8.219 qmmm_forces_with_gaussian 6 4.8 0.325 0.389 7.547 8.134 rs_pw_transfer 988 11.5 0.011 0.015 7.822 8.098 xc_rho_set_and_dset_create 119 12.1 0.363 0.722 6.664 7.611 xc_pw_divergence 119 12.1 0.004 0.005 6.975 7.254 qmmm_el_coupling 6 3.8 0.000 0.000 7.119 7.252 qmmm_elec_with_gaussian 6 4.8 0.340 0.394 7.117 7.250 density_rs2pw 119 9.3 0.005 0.007 6.530 6.799 mp_alltoall_z22v 2059 17.4 4.894 6.590 4.894 6.590 potential_pw2rs 119 12.1 0.006 0.008 6.090 6.105 grid_collocate_task_list 119 9.3 4.577 5.134 4.577 5.134 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.993 4.241 grid_integrate_task_list 119 12.1 3.588 4.092 3.588 4.092 x_to_yz 1095 16.8 0.801 0.913 3.332 3.939 yz_to_x 964 16.0 0.551 0.706 2.914 3.897 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.409 3.639 mp_waitany 4028 12.8 2.822 3.551 2.822 3.551 qmmm_forces_gaussian_low_G 6 6.8 3.285 3.533 3.285 3.533 rs_pw_transfer_PW2RS_150 125 13.9 1.247 1.444 3.058 3.124 qmmm_elec_gaussian_low_G 6 6.8 2.817 3.038 2.817 3.038 pw_restrict_s3 18 5.8 1.396 1.713 2.543 2.856 rs_pw_transfer_RS2PW_150 125 11.2 0.931 1.150 2.491 2.773 dbcsr_multiply_generic 2588 12.3 0.057 0.070 2.212 2.312 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.127 2.261 pw_prolongate_s3 18 6.8 1.172 1.337 2.127 2.261 mp_waitall_1 188862 16.2 2.008 2.232 2.008 2.232 qs_scf_new_mos 113 7.2 0.000 0.000 2.113 2.119 qs_scf_loop_do_ot 113 8.2 0.000 0.000 2.113 2.119 ot_scf_mini 113 9.2 0.001 0.001 2.029 2.034 qs_ks_ddapc 119 10.1 0.002 0.002 1.886 1.989 mp_sum_dm3 33 5.7 1.821 1.954 1.821 1.954 pw_gather_p 964 15.0 1.312 1.691 1.312 1.691 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.642 1.642 pw_scatter_p 1095 15.8 1.563 1.603 1.563 1.603 pw_integral_ab 2761 7.7 1.120 1.262 1.480 1.591 init_scf_loop 6 6.8 0.000 0.000 1.529 1.529 mp_sum_d 5820 12.2 0.940 1.471 0.940 1.471 xc_functional_eval 238 13.1 0.003 0.004 0.733 1.371 ot_mini 113 10.2 0.000 0.001 1.259 1.265 rs_pw_transfer_PW2RS_40 119 14.1 0.233 0.276 1.101 1.211 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=30.164000000000016, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=89.22, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.647, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.622, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.366, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.612, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.413, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=32.623000000000005, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.817, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.12, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.285, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.577, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.796, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.894, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.588, yerr=0.0 Summary: Performance test took 44 minutes. Status: OK Removing intermediate container 795851dbf5c8 ---> 965488a561d4 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 1da0df202f8c Removing intermediate container 1da0df202f8c ---> f284a2e54ccf Step 42/42 : ENTRYPOINT [] ---> Running in aefdeeaf9167 Removing intermediate container aefdeeaf9167 ---> e8905a7c9c40 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built e8905a7c9c40 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-14b:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-06-21 20:01:03+00:00