StartDate: 2022-03-11 19:10:59+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: cc8ca8cdb1cc44570107781567d073e2b9763fbe CommitTime: 2022-03-11 17:12:55 +0100 CommitAuthor: Ole Schütt CommitSubject: Docker: Postpone hip/rocm regtests until container execution Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=cc8ca8cdb1cc44570107781567d073e2b9763fbe Sending build context to Docker daemon 362.3MB Step 1/41 : FROM ubuntu:20.04 20.04: Pulling from library/ubuntu 7c3b88808835: Already exists Digest: sha256:8ae9bafbb64f63a50caab98fd3a5e37b3eb837a3e0780b78e5218e63193961f9 Status: Downloaded newer image for ubuntu:20.04 ---> 2b4cba85892a Step 2/41 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 2478d1e60fc2 Step 3/41 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> f04a754c1b4e Step 4/41 : RUN ./install_requirements.sh ubuntu:20.04 ---> Using cache ---> b7c4b3bf2ba3 Step 5/41 : RUN mkdir scripts ---> Using cache ---> 0be4797ba2b9 Step 6/41 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 20eb19422415 Step 7/41 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> bf33731cd10d Step 8/41 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --dry-run ---> Using cache ---> 19b0b3c97393 Step 9/41 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 9fcf7ef497eb Step 10/41 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 5196fa37023c Step 11/41 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> dd293eb974d0 Step 12/41 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> e76392d58f21 Step 13/41 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> a0c151ba1669 Step 14/41 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 89cd0e6c3bff Step 15/41 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 3f34682c7ad5 Step 16/41 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> f53b7bbcf5ed Step 17/41 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 6059648341ff Step 18/41 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> a38ee222da95 Step 19/41 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 033717c35479 Step 20/41 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> ce105c9308f4 Step 21/41 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> da58aaf5626a Step 22/41 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 88850f0a7d75 Step 23/41 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 6180e9da3590 Step 24/41 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> b03deb66bd99 Step 25/41 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 5658a9b5e552 Step 26/41 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 618febff69fa Step 27/41 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 32bc1a768038 Step 28/41 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> a2be3c6bda6c Step 29/41 : WORKDIR /opt/cp2k ---> Using cache ---> 4e192bfbc4ea Step 30/41 : COPY ./Makefile . ---> Using cache ---> 1fb19fb3afae Step 31/41 : COPY ./src ./src ---> Using cache ---> 9fc7724d52f6 Step 32/41 : COPY ./exts ./exts ---> Using cache ---> d4aa1e8784ab Step 33/41 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 8a4772d82bdf Step 34/41 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true )" ---> Running in 5a3029519b86 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 5a3029519b86 ---> 4d07959d9d16 Step 35/41 : COPY ./data ./data ---> 7a54ee0ac629 Step 36/41 : COPY ./tests ./tests ---> d7a35959912b Step 37/41 : COPY ./tools/regtesting ./tools/regtesting ---> 3fdff0ddadf4 Step 38/41 : COPY ./benchmarks ./benchmarks ---> 466773b0908e Step 39/41 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 984831f7949d Step 40/41 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in e7073e67ed57 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.033 166.804 166.804 qs_mol_dyn_low 1 2.0 0.004 0.004 165.883 165.883 qs_forces 11 3.9 0.001 0.001 165.826 165.826 qs_energies 11 4.9 0.001 0.001 155.703 155.703 scf_env_do_scf 11 5.9 0.001 0.001 123.216 123.216 velocity_verlet 10 3.0 0.002 0.002 116.123 116.123 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 82.104 82.104 init_scf_loop 11 6.9 0.000 0.000 40.929 40.929 prepare_preconditioner 11 7.9 0.000 0.000 37.020 37.020 make_preconditioner 11 8.9 0.000 0.000 37.020 37.020 make_full_inverse_cholesky 11 9.9 0.000 0.000 35.098 35.098 rebuild_ks_matrix 119 8.3 0.001 0.001 32.590 32.590 qs_ks_build_kohn_sham_matrix 119 9.3 0.019 0.019 32.589 32.589 qs_ks_update_qs_env 119 7.6 0.001 0.001 30.502 30.502 qs_rho_update_rho 119 7.7 0.001 0.001 29.112 29.112 calculate_rho_elec 119 8.7 1.557 1.557 29.111 29.111 qs_scf_new_mos 108 7.5 0.001 0.001 28.569 28.569 qs_scf_loop_do_ot 108 8.5 0.001 0.001 28.568 28.568 ot_scf_mini 108 9.5 0.003 0.003 26.525 26.525 dbcsr_multiply_generic 2286 12.5 0.197 0.197 23.831 23.831 grid_collocate_task_list 119 9.7 23.007 23.007 23.007 23.007 cp_fm_cholesky_invert 11 10.9 21.807 21.807 21.807 21.807 sum_up_and_integrate 119 10.3 0.377 0.377 20.659 20.659 integrate_v_rspace 119 11.3 0.615 0.615 20.281 20.281 grid_integrate_task_list 119 12.3 17.172 17.172 17.172 17.172 init_scf_run 11 5.9 0.001 0.001 16.861 16.861 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.860 16.860 wfi_extrapolate 11 7.9 0.001 0.001 16.017 16.017 ot_mini 108 10.5 0.001 0.001 15.679 15.679 cp_gemm 81 9.0 0.000 0.000 15.527 15.527 cp_gemm_cosma 81 10.0 15.526 15.526 15.526 15.526 make_m2s 4572 13.5 0.067 0.067 13.030 13.030 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.580 10.580 qs_ot_get_derivative 108 11.5 0.002 0.002 8.302 8.302 pw_transfer 1439 11.6 0.097 0.097 7.762 7.762 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 7.446 7.446 ot_diis_step 108 11.5 0.006 0.006 7.373 7.373 cp_fm_cholesky_decompose 22 10.9 7.107 7.107 7.107 7.107 make_images 4572 14.5 2.539 2.539 6.879 6.879 qs_ot_get_p 119 10.4 0.001 0.001 6.827 6.827 dbcsr_make_dense_low 5837 15.5 0.104 0.104 6.368 6.368 dbcsr_complete_redistribute 329 12.2 3.026 3.026 6.343 6.343 fft_wrap_pw1pw2_140 487 13.2 0.580 0.580 6.300 6.300 make_dense_data 5837 16.5 5.552 5.552 6.241 6.241 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.219 6.219 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.164 6.164 apply_single 119 13.6 0.001 0.001 6.164 6.164 multiply_cannon 2286 13.5 1.109 1.109 6.097 6.097 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.999 5.999 dbcsr_make_images_dense 3978 14.8 0.027 0.027 5.664 5.664 qs_create_task_list 11 7.9 0.000 0.000 5.460 5.460 generate_qs_task_list 11 8.9 3.767 3.767 5.460 5.460 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.260 5.260 qs_ot_p2m_diag 50 11.0 0.225 0.225 5.214 5.214 dbcsr_copy 2102 12.0 0.287 0.287 5.029 5.029 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.887 4.887 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.887 4.887 dbcsr_copy_into_existing 22 7.9 4.692 4.692 4.693 4.693 cp_dbcsr_syevd 50 12.0 0.005 0.005 4.618 4.618 density_rs2pw 119 9.7 0.007 0.007 4.547 4.547 pw_poisson_solve 119 10.3 1.839 1.839 4.504 4.504 cp_fm_diag_elpa 50 13.0 0.001 0.001 4.445 4.445 cp_fm_diag_elpa_base 50 14.0 4.388 4.388 4.444 4.444 multiply_cannon_loop 2286 14.5 0.066 0.066 4.398 4.398 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.332 4.332 multiply_cannon_multrec 2286 15.5 4.256 4.256 4.331 4.331 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.001 4.001 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.901 3.901 fft3d_s 1202 14.6 3.439 3.439 3.446 3.446 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.012 74.645 74.646 qs_mol_dyn_low 1 2.0 0.005 0.006 74.514 74.520 qs_forces 11 3.9 0.002 0.002 74.448 74.449 qs_energies 11 4.9 0.001 0.001 69.497 69.500 scf_env_do_scf 11 5.9 0.001 0.001 62.725 62.727 scf_env_do_scf_inner_loop 108 6.5 0.003 0.010 58.205 58.205 velocity_verlet 10 3.0 0.002 0.003 44.428 44.430 rebuild_ks_matrix 119 8.3 0.001 0.001 28.935 28.994 qs_ks_build_kohn_sham_matrix 119 9.3 0.022 0.023 28.935 28.994 qs_ks_update_qs_env 119 7.6 0.001 0.001 25.801 25.853 sum_up_and_integrate 119 10.3 0.049 0.053 22.664 22.702 integrate_v_rspace 119 11.3 0.005 0.005 22.614 22.653 qs_rho_update_rho 119 7.7 0.001 0.001 22.425 22.444 calculate_rho_elec 119 8.7 0.048 0.050 22.424 22.443 dbcsr_multiply_generic 2286 12.5 0.137 0.141 17.755 17.860 grid_integrate_task_list 119 12.3 16.054 16.667 16.054 16.667 grid_collocate_task_list 119 9.7 15.806 16.364 15.806 16.364 qs_scf_new_mos 108 7.5 0.001 0.001 14.634 14.688 qs_scf_loop_do_ot 108 8.5 0.001 0.001 14.633 14.686 ot_scf_mini 108 9.5 0.003 0.004 13.732 13.785 multiply_cannon 2286 13.5 0.226 0.234 11.744 11.933 multiply_cannon_loop 2286 14.5 0.233 0.246 10.590 10.874 mp_waitall_1 169478 16.3 8.796 9.185 8.796 9.185 ot_mini 108 10.5 0.001 0.001 8.118 8.174 rs_pw_transfer 974 11.9 0.016 0.018 6.927 7.740 density_rs2pw 119 9.7 0.009 0.010 5.995 6.808 multiply_cannon_metrocomm3 18288 15.5 0.086 0.089 5.707 6.119 pw_transfer 1439 11.6 0.144 0.154 5.990 6.043 fft_wrap_pw1pw2 1201 12.6 0.014 0.015 5.685 5.750 potential_pw2rs 119 12.3 0.010 0.011 5.272 5.284 fft_wrap_pw1pw2_140 487 13.2 0.566 0.592 4.899 5.049 init_scf_run 11 5.9 0.000 0.002 4.664 4.665 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.664 4.664 init_scf_loop 11 6.9 0.000 0.001 4.502 4.503 fft3d_ps 1201 14.6 2.322 2.443 4.235 4.302 wfi_extrapolate 11 7.9 0.001 0.001 4.261 4.261 qs_ot_get_derivative 108 11.5 0.001 0.002 4.023 4.079 make_m2s 4572 13.5 0.077 0.080 4.010 4.069 ot_diis_step 108 11.5 0.005 0.006 4.054 4.055 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.932 3.986 apply_single 119 13.6 0.001 0.001 3.931 3.986 multiply_cannon_multrec 18288 15.5 3.638 3.769 3.656 3.787 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.356 3.364 make_images 4572 14.5 0.191 0.194 3.295 3.359 mp_waitany 9880 13.7 2.410 3.214 2.410 3.214 rs_pw_transfer_RS2PW_140 130 11.5 0.537 0.581 2.188 3.007 rs_pw_transfer_PW2RS_140 130 13.9 1.260 1.322 2.608 2.636 mp_alltoall_d11v 2130 13.8 1.582 2.158 1.582 2.158 qs_ot_get_p 119 10.4 0.001 0.001 1.933 2.011 rs_gather_matrices 119 12.3 0.133 0.146 1.235 1.854 cp_gemm 81 9.0 0.000 0.000 1.670 1.675 cp_gemm_cosma 81 10.0 1.670 1.674 1.670 1.674 make_images_data 4572 15.5 0.063 0.070 1.480 1.579 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.444 1.558 prepare_preconditioner 11 7.9 0.000 0.000 1.507 1.518 make_preconditioner 11 8.9 0.000 0.000 1.507 1.518 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=77.929, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.007, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=21.807, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.172, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.526, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.107, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.256, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=26.270999999999994, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.806, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.054, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.67, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.638, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.41, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.796, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.037 217.057 217.057 qs_mol_dyn_low 1 2.0 0.004 0.004 216.252 216.252 qs_forces 11 3.9 0.001 0.001 216.193 216.193 qs_energies 11 4.9 0.001 0.001 202.127 202.127 scf_env_do_scf 11 5.9 0.001 0.001 165.166 165.166 velocity_verlet 10 3.0 0.002 0.002 146.800 146.800 scf_env_do_scf_inner_loop 96 6.5 0.009 0.009 121.953 121.953 rebuild_ks_matrix 107 8.3 0.001 0.001 61.880 61.880 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.017 61.880 61.880 qs_ks_update_qs_env 107 7.6 0.001 0.001 55.758 55.758 qs_rho_update_rho 107 7.7 0.001 0.001 55.038 55.038 calculate_rho_elec 107 8.7 1.391 1.391 55.037 55.037 sum_up_and_integrate 107 10.3 0.341 0.341 51.181 51.181 integrate_v_rspace 107 11.3 0.553 0.553 50.840 50.840 grid_collocate_task_list 107 9.7 49.592 49.592 49.592 49.592 grid_integrate_task_list 107 12.3 48.057 48.057 48.057 48.057 init_scf_loop 11 6.9 0.000 0.000 43.001 43.001 prepare_preconditioner 11 7.9 0.000 0.000 35.653 35.653 make_preconditioner 11 8.9 0.000 0.000 35.653 35.653 make_full_inverse_cholesky 11 9.9 0.000 0.000 33.597 33.597 qs_scf_new_mos 96 7.5 0.001 0.001 23.552 23.552 qs_scf_loop_do_ot 96 8.5 0.001 0.001 23.552 23.552 ot_scf_mini 96 9.5 0.003 0.003 21.819 21.819 cp_fm_cholesky_invert 11 10.9 20.294 20.294 20.294 20.294 dbcsr_multiply_generic 1966 12.4 0.166 0.166 19.990 19.990 init_scf_run 11 5.9 0.001 0.001 19.844 19.844 scf_env_initial_rho_setup 11 6.9 0.001 0.001 19.843 19.843 wfi_extrapolate 11 7.9 0.001 0.001 18.767 18.767 cp_gemm 81 9.0 0.000 0.000 15.480 15.480 cp_gemm_cosma 81 10.0 15.479 15.479 15.479 15.479 ot_mini 96 10.5 0.001 0.001 12.630 12.630 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.847 11.847 make_m2s 3932 13.4 0.057 0.057 10.827 10.827 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.742 7.742 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.321 7.321 cp_fm_cholesky_decompose 22 10.9 7.123 7.123 7.123 7.123 pw_transfer 1295 11.6 0.086 0.086 6.931 6.931 qs_ot_get_derivative 96 11.5 0.001 0.001 6.878 6.878 qs_create_task_list 11 7.9 0.000 0.000 6.781 6.781 generate_qs_task_list 11 8.9 5.116 5.116 6.781 6.781 fft_wrap_pw1pw2 1081 12.6 0.009 0.009 6.653 6.653 dbcsr_complete_redistribute 317 12.2 3.034 3.034 6.505 6.505 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.322 6.322 make_images 3932 14.4 2.183 2.183 5.968 5.968 qs_ot_get_p 107 10.4 0.001 0.001 5.786 5.786 ot_diis_step 96 11.5 0.005 0.005 5.748 5.748 fft_wrap_pw1pw2_140 439 13.2 0.601 0.601 5.652 5.652 multiply_cannon 1966 13.4 0.917 0.917 5.246 5.246 copy_dbcsr_to_fm 147 11.2 0.003 0.003 5.222 5.222 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.107 5.107 calculate_w_matrix_ot 11 6.9 0.008 0.008 5.107 5.107 dbcsr_copy 1855 11.9 0.258 0.258 5.018 5.018 dbcsr_make_dense_low 4961 15.5 0.082 0.082 5.010 5.010 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.922 4.922 apply_single 107 13.6 0.000 0.000 4.922 4.922 make_dense_data 4961 16.5 4.364 4.364 4.909 4.909 dbcsr_copy_into_existing 22 7.9 4.719 4.719 4.719 4.719 dbcsr_make_images_dense 3386 14.7 0.023 0.023 4.461 4.461 qs_ot_p2m_diag 44 11.0 0.201 0.201 4.460 4.460 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.014 128.173 128.174 qs_mol_dyn_low 1 2.0 0.005 0.005 128.050 128.057 qs_forces 11 3.9 0.002 0.002 127.997 127.997 qs_energies 11 4.9 0.001 0.001 119.266 119.268 scf_env_do_scf 11 5.9 0.001 0.001 109.659 109.660 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 101.845 101.846 velocity_verlet 10 3.0 0.002 0.002 76.363 76.365 rebuild_ks_matrix 107 8.3 0.001 0.001 58.678 58.729 qs_ks_build_kohn_sham_matrix 107 9.3 0.020 0.021 58.678 58.728 sum_up_and_integrate 107 10.3 0.041 0.045 53.185 53.239 integrate_v_rspace 107 11.3 0.004 0.005 53.144 53.197 qs_ks_update_qs_env 107 7.6 0.001 0.001 51.723 51.768 qs_rho_update_rho 107 7.7 0.001 0.001 49.019 49.032 calculate_rho_elec 107 8.7 0.043 0.045 49.018 49.031 grid_integrate_task_list 107 12.3 46.419 47.577 46.419 47.577 grid_collocate_task_list 107 9.7 42.502 43.595 42.502 43.595 dbcsr_multiply_generic 1966 12.4 0.116 0.120 14.993 15.271 qs_scf_new_mos 96 7.5 0.001 0.001 12.067 12.109 qs_scf_loop_do_ot 96 8.5 0.001 0.001 12.066 12.108 ot_scf_mini 96 9.5 0.003 0.003 11.303 11.347 multiply_cannon 1966 13.4 0.192 0.197 9.979 10.262 multiply_cannon_loop 1966 14.4 0.198 0.205 9.024 9.328 rs_pw_transfer 878 11.9 0.014 0.015 6.959 8.050 init_scf_loop 11 6.9 0.001 0.001 7.797 7.798 mp_waitall_1 146670 16.2 7.431 7.726 7.431 7.726 init_scf_run 11 5.9 0.000 0.002 7.519 7.520 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.519 7.519 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.154 7.161 density_rs2pw 107 9.7 0.008 0.009 5.985 7.093 wfi_extrapolate 11 7.9 0.001 0.001 6.888 6.888 ot_mini 96 10.5 0.001 0.001 6.670 6.717 pw_transfer 1295 11.6 0.129 0.136 5.222 5.283 multiply_cannon_metrocomm3 15728 15.4 0.073 0.075 4.798 5.161 fft_wrap_pw1pw2 1081 12.6 0.013 0.014 4.956 5.016 potential_pw2rs 107 12.3 0.009 0.010 4.700 4.709 fft_wrap_pw1pw2_140 439 13.2 0.507 0.529 4.302 4.449 mp_waitany 8968 13.7 2.970 4.055 2.970 4.055 rs_pw_transfer_RS2PW_140 118 11.5 0.418 0.446 2.670 3.777 fft3d_ps 1081 14.6 2.016 2.100 3.661 3.739 mp_alltoall_d11v 1998 13.7 2.286 3.634 2.286 3.634 make_m2s 3932 13.4 0.065 0.068 3.406 3.446 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.354 3.405 apply_single 107 13.6 0.001 0.001 3.354 3.405 ot_diis_step 96 11.5 0.004 0.005 3.374 3.374 multiply_cannon_multrec 15728 15.4 3.185 3.302 3.201 3.318 qs_ot_get_derivative 96 11.5 0.001 0.001 3.265 3.310 rs_gather_matrices 107 12.3 0.119 0.130 1.972 3.297 make_images 3932 14.4 0.165 0.168 2.797 2.843 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=76.512, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=49.592, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.057, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=20.294, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.479, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.123, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=25.66600000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=42.502, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.419, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.97, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.431, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.185, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.598 0.598 292.323 292.323 qs_energies 1 2.0 0.000 0.000 290.868 290.868 scf_env_do_scf 1 3.0 0.000 0.000 288.308 288.308 qs_ks_update_qs_env 8 5.0 0.000 0.000 269.933 269.933 rebuild_ks_matrix 7 6.0 0.000 0.000 269.823 269.823 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 269.823 269.823 hfx_ks_matrix 7 8.0 0.001 0.001 178.097 178.097 integrate_four_center 7 9.0 2.453 2.453 178.063 178.063 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 169.387 169.387 integrate_four_center_main 7 10.0 1.583 1.583 164.296 164.296 integrate_four_center_bin 453 11.0 162.713 162.713 162.713 162.713 init_scf_loop 1 4.0 0.000 0.000 118.901 118.901 cp_gemm 129 10.3 0.001 0.001 75.673 75.673 cp_gemm_cosma 129 11.3 75.672 75.672 75.672 75.672 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 43.449 43.449 admm_fit_mo_coeffs 7 9.0 0.000 0.000 41.657 41.657 admm_mo_merge_derivs 7 8.0 0.000 0.000 38.931 38.931 merge_mo_derivs_diag 7 9.0 0.024 0.024 38.931 38.931 purify_mo_diag 7 10.0 0.001 0.001 24.939 24.939 fit_mo_coeffs 7 10.0 0.000 0.000 16.719 16.719 prepare_preconditioner 1 5.0 0.000 0.000 14.171 14.171 make_preconditioner 1 6.0 0.000 0.000 14.171 14.171 integrate_four_center_load 7 10.0 0.001 0.001 10.918 10.918 hfx_load_balance 1 11.0 0.002 0.002 10.917 10.917 arnoldi_normal_ev 11 9.3 0.003 0.003 8.420 8.420 estimate_cond_num 1 7.0 0.000 0.000 8.339 8.339 build_subspace 28 9.5 0.015 0.015 8.295 8.295 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.209 0.216 191.772 191.773 qs_energies 1 2.0 0.000 0.000 191.422 191.423 scf_env_do_scf 1 3.0 0.000 0.000 190.860 190.860 qs_ks_update_qs_env 8 5.0 0.000 0.000 187.857 187.857 rebuild_ks_matrix 7 6.0 0.000 0.000 187.844 187.844 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 187.844 187.844 hfx_ks_matrix 7 8.0 0.001 0.001 175.444 175.445 integrate_four_center 7 9.0 0.096 0.395 175.428 175.429 integrate_four_center_main 7 10.0 0.005 0.005 159.813 163.471 integrate_four_center_bin 448 11.0 159.808 163.466 159.808 163.466 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 110.260 110.260 init_scf_loop 1 4.0 0.000 0.000 80.598 80.599 integrate_four_center_load 7 10.0 0.000 0.000 10.912 10.915 hfx_load_balance 1 11.0 0.001 0.002 10.912 10.915 mp_sync 70 11.3 3.863 6.777 3.863 6.777 hfx_load_balance_count 1 12.0 5.275 5.457 5.275 5.457 hfx_load_balance_bin 1 12.0 5.258 5.444 5.258 5.444 cp_gemm 129 10.3 0.000 0.001 5.041 5.046 cp_gemm_cosma 129 11.3 5.040 5.046 5.040 5.046 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=49.303999999999974, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=162.713, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=75.672, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.453, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=1.583, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.598, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=12.21799999999999, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=159.808, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=5.04, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.096, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.209, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.275, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.258, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.863, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 397.870 397.870 qs_energies 1 2.0 0.000 0.000 397.356 397.356 mp2_main 1 3.0 0.000 0.000 390.830 390.830 mp2_gpw_main 1 4.0 0.000 0.000 390.414 390.414 rpa_ri_compute_en 1 5.0 0.000 0.000 377.079 377.079 rpa_num_int 1 6.0 0.001 0.001 377.056 377.056 compute_mat_P_omega 1 7.0 0.002 0.002 175.504 175.504 compute_mat_P_omega_contract 10 8.0 12.111 12.111 173.903 173.903 cp_gemm 105 8.4 0.000 0.000 172.080 172.080 cp_gemm_cosma 105 9.4 172.080 172.080 172.080 172.080 dbt_total 2336 9.6 0.019 0.019 167.199 167.199 GW_matrix_operations 10 7.0 0.005 0.005 120.414 120.414 dbt_contract 787 11.0 48.879 48.879 102.196 102.196 dbt_copy 1103 10.7 21.408 21.408 63.687 63.687 compute_mat_P_omega_calc_M_occ 250 9.0 12.131 12.131 62.808 62.808 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 58.541 58.541 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 56.545 56.545 dbt_tas_total 1149 12.2 0.263 0.263 49.990 49.990 dbt_tas_multiply 807 12.1 0.003 0.003 48.703 48.703 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 38.693 38.693 dbt_tas_dbm 807 14.1 0.005 0.005 38.589 38.589 dbm_multiply 807 16.1 38.577 38.577 38.577 38.577 dbt_tas_mm_1N 524 15.1 0.002 0.002 23.464 23.464 compute_QP_energies 1 7.0 0.000 0.000 20.677 20.677 compute_self_energy_cubic_gw 1 8.0 0.101 0.101 20.677 20.677 dbt_tas_copy 574 11.4 16.416 16.416 19.748 19.748 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 18.853 18.853 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 14.495 14.495 dbt_tas_mm_2 251 15.0 0.002 0.002 13.360 13.360 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 13.321 13.321 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 13.079 13.079 dbt_tas_reserve_blocks_index 3261 13.7 7.817 7.817 12.966 12.966 dbt_copy_nocomm 251 12.0 11.323 11.323 12.909 12.909 dbt_reserve_blocks_index 2280 12.5 1.630 1.630 10.898 10.898 dbt_reserve_blocks_index_array 2222 11.6 0.011 0.011 10.855 10.855 cp_fm_cholesky_invert 10 8.0 9.126 9.126 9.126 9.126 contract_cubic_gw 21 9.0 0.000 0.000 8.768 8.768 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.010 60.537 60.539 qs_energies 1 2.0 0.001 0.001 60.408 60.409 mp2_main 1 3.0 0.000 0.001 58.773 58.774 mp2_gpw_main 1 4.0 0.000 0.001 58.708 58.709 rpa_ri_compute_en 1 5.0 0.000 0.000 56.583 56.584 rpa_num_int 1 6.0 0.000 0.001 56.575 56.576 dbt_total 2336 9.6 0.020 0.022 43.019 43.023 compute_mat_P_omega 1 7.0 0.001 0.002 42.134 42.139 compute_mat_P_omega_contract 10 8.0 0.812 0.838 41.754 41.759 dbt_contract 787 11.0 2.011 2.172 31.196 31.202 dbt_tas_total 1149 12.2 0.088 0.097 27.161 27.162 dbt_tas_multiply 807 12.1 0.003 0.004 27.075 27.078 dbt_tas_dbm 807 14.1 0.006 0.006 18.714 18.716 dbm_multiply 807 16.1 14.914 15.865 14.914 15.865 compute_mat_P_omega_calc_M_occ 250 9.0 0.799 0.822 13.798 13.798 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 10.431 10.432 dbt_copy 1111 10.7 4.386 4.637 9.917 10.266 cp_gemm 105 8.4 0.000 0.000 9.828 9.843 cp_gemm_cosma 105 9.4 9.828 9.843 9.828 9.843 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 8.557 8.558 mp_sync 8706 11.6 7.242 8.398 7.242 8.398 dbt_tas_mm_2 251 15.0 0.003 0.003 8.119 8.120 dbt_tas_mm_1N 524 15.1 0.003 0.003 7.384 7.957 GW_matrix_operations 10 7.0 0.001 0.002 6.283 6.291 compute_QP_energies 1 7.0 0.000 0.000 4.263 4.263 compute_self_energy_cubic_gw 1 8.0 0.005 0.006 4.262 4.263 dbt_communicate_buffer 1098 11.7 0.100 0.107 3.991 4.149 mp_waitall_2 3776 14.7 3.819 4.145 3.819 4.145 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.449 3.449 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.326 3.328 contract_cubic_gw 21 9.0 0.000 0.000 3.218 3.218 dbt_reserve_blocks_index_array 2791 11.4 0.014 0.017 2.548 2.749 dbt_reserve_blocks_index 2849 12.4 0.113 0.120 2.549 2.747 dbt_tas_reserve_blocks_index 3300 13.8 0.278 0.299 2.499 2.685 dbm_reserve_blocks 3696 14.8 2.298 2.482 2.298 2.482 dbt_tas_replicate 396 14.1 1.238 1.488 2.365 2.412 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.119 2.122 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 1.925 1.931 convert_to_new_pgrid 2421 14.1 0.041 0.047 1.641 1.822 dbm_copy 1608 15.1 1.590 1.772 1.590 1.772 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.002 1.687 1.691 scf_env_do_scf 1 3.0 0.000 0.000 1.574 1.574 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.574 1.574 mp_max_i 1992 9.8 1.157 1.466 1.157 1.466 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=100.50999999999999, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=172.08, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_contract", label="dbt_contract", y=48.879, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=38.577, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_copy", label="dbt_copy", y=21.408, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=16.416, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=18.337000000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.828, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_contract", label="dbt_contract", y=2.011, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=14.914, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_copy", label="dbt_copy", y=4.386, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.819, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=7.242, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.025 0.025 745.014 745.014 qs_forces 1 2.0 0.000 0.000 744.209 744.209 rebuild_ks_matrix 7 6.6 0.000 0.000 733.961 733.961 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 733.961 733.961 hfx_ks_matrix 7 8.6 0.000 0.000 730.993 730.993 dbt_total 1051 10.5 0.010 0.010 599.469 599.469 hfx_ri_update_ks 7 9.6 0.000 0.000 399.786 399.786 hfx_ri_update_ks_Pmat 7 10.6 66.717 66.717 399.780 399.780 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 382.462 382.462 dbt_contract 283 11.7 125.114 125.114 376.364 376.364 qs_energies 1 3.0 0.000 0.000 361.672 361.672 scf_env_do_scf 1 4.0 0.000 0.000 361.232 361.232 qs_ks_update_qs_env 8 6.0 0.000 0.000 351.505 351.505 hfx_ri_update_forces 1 7.0 0.023 0.023 331.200 331.200 dbt_tas_total 533 12.7 3.145 3.145 238.239 238.239 dbt_tas_multiply 292 12.8 0.001 0.001 233.449 233.449 dbt_copy 444 11.5 109.719 109.719 218.300 218.300 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 210.628 210.628 dbt_tas_dbm 292 14.8 0.002 0.002 209.653 209.653 dbm_multiply 292 16.8 209.647 209.647 209.647 209.647 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 176.148 176.148 init_scf_loop 2 5.0 0.000 0.000 150.601 150.601 dbt_tas_mm_2 119 16.0 0.001 0.001 116.893 116.893 hfx_ri_forces_Pmat_2c_inv_2 9 8.0 0.001 0.001 60.584 60.584 dbt_tas_mm_3N 94 14.7 0.000 0.000 59.209 59.209 precalc_derivatives 1 8.0 0.009 0.009 58.356 58.356 hfx_ri_forces_Pmat_metric 9 8.0 0.001 0.001 56.943 56.943 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 47.174 47.174 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 46.924 46.924 dbt_tas_reserve_blocks_index 1569 14.6 17.794 17.794 38.897 38.897 dbt_tas_copy 287 12.3 29.145 29.145 36.934 36.934 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 35.898 35.898 dbt_reserve_blocks_index 1020 13.6 3.774 3.774 34.433 34.433 dbt_reserve_blocks_index_array 999 12.6 0.008 0.008 34.115 34.115 dbt_tas_mm_3T 77 17.1 0.000 0.000 33.377 33.377 hfx_ri_forces_Pmat_PQ_der 9 8.0 1.451 1.451 31.768 31.768 hfx_ri_forces_Pmat_2c_inv_1 1 8.0 4.783 4.783 30.565 30.565 hfx_ri_forces_Pmat_Pmat_2 9 8.0 0.000 0.000 30.259 30.259 build_3c_derivatives 2 9.0 1.243 1.243 28.095 28.095 get_force_from_3c_trace 18 8.0 27.392 27.392 27.392 27.392 dbt_communicate_buffer 157 12.8 22.052 22.052 22.052 22.052 dbt_split_blocks_generic 138 11.7 17.777 17.777 21.597 21.597 dbt_split_copyback 69 11.7 17.311 17.311 19.649 19.649 dbm_reserve_blocks 1813 15.4 17.657 17.657 17.657 17.657 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.012 116.783 116.783 qs_forces 1 2.0 0.000 0.000 116.609 116.609 rebuild_ks_matrix 7 6.6 0.000 0.000 115.361 115.363 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.003 115.361 115.363 hfx_ks_matrix 7 8.6 0.001 0.001 113.485 113.486 dbt_total 1051 10.5 0.011 0.013 104.563 104.565 dbt_contract 283 11.7 5.317 5.616 82.920 82.940 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 78.098 78.098 dbt_tas_total 533 12.7 0.115 0.189 76.172 76.173 dbt_tas_multiply 292 12.8 0.001 0.002 73.009 73.010 hfx_ri_update_forces 1 7.0 0.003 0.003 72.956 72.957 dbt_tas_dbm 292 14.8 0.003 0.003 56.925 56.928 dbm_multiply 292 16.8 50.794 52.263 50.794 52.263 hfx_ri_update_ks 7 9.6 0.000 0.000 40.527 40.527 hfx_ri_update_ks_Pmat 7 10.6 2.608 2.804 40.525 40.525 qs_energies 1 3.0 0.000 0.000 38.486 38.488 scf_env_do_scf 1 4.0 0.000 0.000 38.242 38.242 qs_ks_update_qs_env 8 6.0 0.000 0.000 37.265 37.267 dbt_tas_mm_2 119 16.0 0.001 0.002 25.226 25.227 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 22.005 22.005 hfx_ri_forces_Pmat_metric 9 8.0 0.001 0.001 21.892 21.892 dbt_copy 464 11.6 6.231 6.488 17.203 17.609 init_scf_loop 2 5.0 0.000 0.000 16.236 16.237 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 15.564 15.564 dbt_tas_mm_3T 77 17.1 0.001 0.001 14.356 15.152 mp_sync 3797 12.3 10.589 14.981 10.589 14.981 dbt_tas_mm_3N 94 14.7 0.001 0.001 13.739 14.341 hfx_ri_forces_Pmat_2c_inv_2 9 8.0 0.000 0.001 13.087 13.087 hfx_ri_forces_Pmat_PQ_der 9 8.0 0.076 0.080 11.207 11.208 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 10.916 10.916 hfx_ri_forces_Pmat_Pmat_2 9 8.0 0.000 0.000 8.909 8.935 dbt_tas_reserve_blocks_index 1602 14.7 0.605 0.641 6.896 7.383 mp_waitall_2 1484 15.5 6.747 7.279 6.747 7.279 dbm_reserve_blocks 1848 15.6 6.675 7.127 6.675 7.127 precalc_derivatives 1 8.0 0.003 0.003 6.300 6.301 dbt_reserve_blocks_index 1191 13.5 0.172 0.180 5.889 6.208 dbt_reserve_blocks_index_array 1170 12.5 0.010 0.011 5.854 6.175 dbt_tas_replicate 246 14.5 1.751 2.377 5.655 5.918 hfx_ri_forces_Pmat_2c_inv_1 1 8.0 0.165 0.178 5.257 5.262 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.812 4.813 convert_to_new_pgrid 876 14.8 0.037 0.047 3.956 4.252 dbm_copy 578 15.8 3.886 4.181 3.886 4.181 dbt_tas_communicate_buffer 498 15.8 0.022 0.026 3.671 4.105 dbt_communicate_buffer 328 12.3 0.024 0.025 3.467 3.608 build_3c_derivatives 2 9.0 0.639 0.690 3.447 3.458 dbt_tas_replicate_communicate_ 127 15.0 0.004 0.006 2.905 3.302 hfx_ri_forces_Pmat_Pmat_1 3 8.0 0.000 0.000 2.951 2.951 dbt_tas_copy 141 13.3 1.432 1.504 2.525 2.733 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 2.602 2.612 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.001 2.344 2.345 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=187.015, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=209.647, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_contract", label="dbt_contract", y=125.114, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_copy", label="dbt_copy", y=109.719, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=66.717, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=29.145, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=17.657, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=26.39, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=50.794, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_contract", label="dbt_contract", y=5.317, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_copy", label="dbt_copy", y=6.231, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=2.608, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=1.432, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.675, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=6.747, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=10.589, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 258.739 258.739 qs_energies 1 2.0 0.000 0.000 258.441 258.441 mp2_main 1 3.0 0.000 0.000 244.485 244.485 mp2_gpw_main 1 4.0 0.001 0.001 243.201 243.201 mp2_ri_gpw_compute_in 1 5.0 0.834 0.834 175.576 175.576 mp2_ri_gpw_compute_in_loop 1 6.0 0.029 0.029 144.107 144.107 mp2_eri_3c_integrate_gpw 2656 7.0 0.029 0.029 104.954 104.954 integrate_v_rspace 2666 8.0 2.542 2.542 84.596 84.596 grid_integrate_task_list 2666 9.0 78.746 78.746 78.746 78.746 mp2_ri_gpw_compute_en 1 5.0 0.053 0.053 67.577 67.577 mp2_ri_gpw_compute_en_RI_loop 1 6.0 32.778 32.778 63.997 63.997 calculate_wavefunction 5312 9.0 27.927 27.927 37.315 37.315 get_2c_integrals 1 6.0 0.000 0.000 30.578 30.578 compute_2c_integrals 1 7.0 0.011 0.011 28.391 28.391 compute_2c_integrals_loop_lm 1 8.0 0.025 0.025 28.351 28.351 mp2_eri_2c_integrate_gpw 1 9.0 4.527 4.527 28.327 28.327 dbcsr_multiply_generic 5322 8.0 0.313 0.313 25.668 25.668 ao_to_mo_and_store_B_mult_1 2656 7.0 0.021 0.021 25.634 25.634 mp2_ri_gpw_compute_en_expansio 2080 7.0 3.788 3.788 20.181 20.181 pw_transfer 63872 10.6 2.060 2.060 16.632 16.632 offload_gemm 2080 8.0 16.393 16.393 16.393 16.393 fft_wrap_pw1pw2 53228 11.4 0.242 0.242 14.235 14.235 multiply_cannon 5322 9.0 2.420 2.420 14.215 14.215 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 8.404 8.404 13.320 13.320 scf_env_do_scf 1 3.0 0.000 0.000 13.222 13.222 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 13.222 13.222 qs_scf_new_mos 10 5.0 0.000 0.000 11.228 11.228 mp2_ri_gpw_compute_en_ener 2080 7.0 11.025 11.025 11.025 11.025 multiply_cannon_loop 5322 10.0 0.176 0.176 10.315 10.315 fft3d_s 53229 13.4 9.402 9.402 9.470 9.470 fft_wrap_pw1pw2_20 21271 12.4 0.572 0.572 9.153 9.153 multiply_cannon_multrec 5322 11.0 8.948 8.948 9.009 9.009 make_m2s 10644 9.0 0.095 0.095 8.049 8.049 eigensolver 11 5.8 0.001 0.001 7.747 7.747 make_images 10644 10.0 2.784 2.784 7.521 7.521 cp_fm_diag_elpa 11 6.8 0.000 0.000 7.160 7.160 cp_fm_diag_elpa_base 11 7.8 6.947 6.947 7.160 7.160 potential_pw2rs 5322 10.0 0.256 0.256 6.482 6.482 copy_dbcsr_to_fm 2679 8.0 0.050 0.050 5.819 5.819 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.008 65.763 65.764 qs_energies 1 2.0 0.000 0.001 65.654 65.655 mp2_main 1 3.0 0.000 0.000 62.367 62.367 mp2_gpw_main 1 4.0 0.001 0.001 62.194 62.194 mp2_ri_gpw_compute_in 1 5.0 0.052 0.057 34.572 35.542 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 31.803 32.775 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 28.524 29.345 integrate_v_rspace 93 8.1 0.192 0.227 28.243 29.039 grid_integrate_task_list 93 9.1 27.664 28.462 27.664 28.462 mp2_ri_gpw_compute_en 1 5.0 0.301 0.306 27.519 27.936 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.564 3.010 23.075 24.281 mp2_ri_gpw_compute_en_expansio 65 7.0 0.215 0.285 10.371 12.491 offload_gemm 65 8.0 10.155 12.276 10.155 12.276 mp2_ri_gpw_compute_en_comm 65 7.0 5.315 5.863 9.153 10.481 mp_sendrecv_dm3 390 8.0 3.838 5.375 3.838 5.375 scf_env_do_scf 1 3.0 0.000 0.000 3.095 3.096 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 3.095 3.096 dbcsr_multiply_generic 176 8.0 0.012 0.014 2.681 2.986 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.650 2.952 get_2c_integrals 1 6.0 0.000 0.000 2.597 2.617 compute_2c_integrals 1 7.0 0.005 0.007 2.218 2.233 compute_2c_integrals_loop_lm 1 8.0 0.002 0.004 2.018 2.100 mp2_eri_2c_integrate_gpw 1 9.0 0.439 0.474 2.016 2.099 mp2_ri_create_group 1 6.0 0.000 0.000 2.010 2.068 replicate_iaK_2intgroup 1 7.0 1.375 1.552 1.844 2.068 mp_sum_d 498 2.3 1.176 1.975 1.176 1.975 calculate_wavefunction 166 9.0 0.876 0.925 1.633 1.691 make_m2s 352 9.0 0.004 0.005 1.390 1.629 make_images 352 10.0 0.066 0.068 1.377 1.616 pw_transfer 2120 10.5 0.081 0.090 1.539 1.570 qs_scf_new_mos 10 5.0 0.000 0.000 1.454 1.475 eigensolver 11 5.8 0.001 0.001 1.459 1.460 fft_wrap_pw1pw2 1768 11.4 0.009 0.010 1.427 1.455 mp2_ri_get_sizes 1 6.0 0.000 0.000 0.972 1.389 mp_min_d 1 7.0 0.972 1.389 0.972 1.389 multiply_cannon 176 9.0 0.022 0.024 1.226 1.320 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=91.86999999999998, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=78.746, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=32.778, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=27.927, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=16.393, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_ener", label="mp2_ri_gpw_compute_en_ener", y=11.025, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_comm", label="mp2_ri_gpw_compute_en_comm", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=15.351000000000006, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.664, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=2.564, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.876, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=10.155, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_ener", label="mp2_ri_gpw_compute_en_ener", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_comm", label="mp2_ri_gpw_compute_en_comm", y=5.315, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=3.838, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.102 0.102 193.392 193.392 qs_energies 1 2.0 0.000 0.000 191.641 191.641 scf_env_do_scf 1 3.0 0.000 0.000 181.193 181.193 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 181.193 181.193 qs_scf_new_mos 15 5.0 0.000 0.000 79.602 79.602 qs_ks_update_qs_env 15 5.0 0.000 0.000 70.124 70.124 rebuild_ks_matrix 15 6.0 0.000 0.000 69.765 69.765 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 69.765 69.765 eigensolver 15 6.0 0.002 0.002 66.065 66.065 cp_fm_diag_elpa 15 7.0 0.000 0.000 51.928 51.928 cp_fm_diag_elpa_base 15 8.0 47.205 47.205 51.928 51.928 qs_vxc_create 15 8.0 0.038 0.038 45.506 45.506 calculate_dispersion_nonloc 15 9.0 8.981 8.981 39.635 39.635 pw_transfer 1191 9.8 0.093 0.093 26.922 26.922 fft_wrap_pw1pw2 1086 10.9 0.013 0.013 26.624 26.624 qs_rho_update_rho 16 5.0 0.000 0.000 25.424 25.424 calculate_rho_elec 16 6.0 0.343 0.343 25.423 25.423 grid_collocate_task_list 16 7.0 23.881 23.881 23.881 23.881 sum_up_and_integrate 15 8.0 0.080 0.080 22.698 22.698 integrate_v_rspace 15 9.0 0.036 0.036 22.619 22.619 grid_integrate_task_list 15 10.0 21.977 21.977 21.977 21.977 fft_wrap_pw1pw2_150 765 12.0 3.364 3.364 20.246 20.246 fft3d_s 1087 12.8 10.988 10.988 10.999 10.999 copy_dbcsr_to_fm 16 5.9 0.001 0.001 10.751 10.751 pw_scatter_s 585 13.0 10.555 10.555 10.555 10.555 cp_fm_cholesky_restore 45 7.0 9.688 9.688 9.688 9.688 dbcsr_complete_redistribute 46 8.3 3.503 3.503 9.561 9.561 cp_fm_upper_to_full 30 8.0 9.169 9.169 9.169 9.169 vdW_energy 15 10.0 8.039 8.039 8.039 8.039 gspace_mixing 14 5.0 0.274 0.274 7.424 7.424 broyden_mixing 14 6.0 6.686 6.686 6.686 6.686 fft_wrap_pw1pw2_200 197 11.5 0.345 0.345 6.121 6.121 xc_vxc_pw_create 15 9.0 1.526 1.526 5.833 5.833 init_scf_run 1 3.0 0.000 0.000 4.874 4.874 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.783 4.783 dbcsr_finalize 159 9.9 0.021 0.021 4.140 4.140 dbcsr_merge_all 91 11.1 0.078 0.078 3.983 3.983 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.016 87.521 87.522 qs_energies 1 2.0 0.000 0.001 87.137 87.138 scf_env_do_scf 1 3.0 0.000 0.000 81.963 81.964 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 81.963 81.963 qs_ks_update_qs_env 15 5.0 0.000 0.000 40.086 40.098 rebuild_ks_matrix 15 6.0 0.000 0.000 40.037 40.049 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.005 40.037 40.049 qs_rho_update_rho 16 5.0 0.000 0.000 24.064 24.067 calculate_rho_elec 16 6.0 0.011 0.012 24.064 24.067 sum_up_and_integrate 15 8.0 0.014 0.016 23.435 23.460 integrate_v_rspace 15 9.0 0.001 0.001 23.422 23.445 grid_collocate_task_list 16 7.0 22.192 22.872 22.192 22.872 grid_integrate_task_list 15 10.0 21.619 22.259 21.619 22.259 qs_scf_new_mos 15 5.0 0.001 0.001 18.264 18.291 eigensolver 15 6.0 0.002 0.002 16.755 16.765 qs_vxc_create 15 8.0 0.001 0.001 16.055 16.067 calculate_dispersion_nonloc 15 9.0 1.417 1.436 13.052 13.068 pw_transfer 1191 9.8 0.138 0.146 12.336 12.443 cp_fm_diag_elpa 15 7.0 0.000 0.000 12.213 12.221 cp_fm_diag_elpa_base 15 8.0 11.952 11.990 12.207 12.210 fft_wrap_pw1pw2 1086 10.9 0.021 0.024 12.032 12.155 fft3d_ps 1086 12.9 5.297 5.441 9.103 9.281 fft_wrap_pw1pw2_150 765 12.0 0.704 0.743 8.057 8.085 cp_fm_cholesky_restore 45 7.0 4.294 4.353 4.294 4.353 fft_wrap_pw1pw2_200 197 11.5 0.381 0.405 3.810 3.921 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.196 3.196 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.745 3.023 xc_vxc_pw_create 15 9.0 0.059 0.080 3.002 3.020 mp_alltoall_z22v 1086 14.9 2.288 2.609 2.288 2.609 x_to_yz 585 14.0 0.936 0.969 2.169 2.236 vdW_energy 15 10.0 2.113 2.208 2.113 2.208 rs_pw_transfer 158 9.4 0.002 0.003 1.751 2.130 density_rs2pw 16 7.0 0.002 0.002 1.697 2.020 build_core_ppnl 1 5.0 1.828 2.001 1.828 2.001 yz_to_x 501 13.7 0.549 0.633 1.605 1.798 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=69.098, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=47.205, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.881, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.977, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.988, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.555, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=9.688, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=22.167, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=11.952, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.192, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.619, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.294, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.297, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.095 0.095 334.002 334.002 qs_energies 1 2.0 0.000 0.000 333.825 333.825 ls_scf 1 3.0 0.000 0.000 331.937 331.937 ls_scf_main 1 4.0 0.002 0.002 317.291 317.291 density_matrix_trs4 11 5.0 0.011 0.011 179.162 179.162 ls_scf_dm_to_ks 11 5.0 0.000 0.000 130.770 130.770 matrix_ls_to_qs 11 6.0 0.000 0.000 126.238 126.238 dbcsr_multiply_generic 185 6.1 0.637 0.637 113.681 113.681 multiply_cannon 185 7.1 2.889 2.889 75.711 75.711 dbcsr_copy_into_existing 11 7.0 71.731 71.731 71.731 71.731 dbcsr_complete_redistribute 23 7.5 43.695 43.695 59.737 59.737 matrix_decluster 11 7.0 0.000 0.000 54.506 54.506 multiply_cannon_loop 185 8.1 0.420 0.420 54.035 54.035 multiply_cannon_multrec 185 9.1 51.750 51.750 51.804 51.804 arnoldi_extremal 12 6.1 0.000 0.000 45.823 45.823 arnoldi_normal_ev 12 7.1 0.027 0.027 45.822 45.822 build_subspace 23 8.1 0.133 0.133 45.180 45.180 dbcsr_matrix_vector_mult 652 9.0 0.272 0.272 34.686 34.686 dbcsr_matrix_vector_mult_local 652 10.0 33.074 33.074 33.083 33.083 make_m2s 370 7.1 0.031 0.031 31.270 31.270 make_images 370 8.1 7.611 7.611 28.702 28.702 dbcsr_finalize 646 7.5 0.233 0.233 21.555 21.555 dbcsr_merge_all 597 8.5 3.443 3.443 19.510 19.510 setup_rec_index_2d 370 8.1 18.634 18.634 18.634 18.634 dbcsr_sort_indices 1103 9.9 17.226 17.226 17.226 17.226 quick_finalize 395 10.0 0.521 0.521 14.701 14.701 tree_to_linear_d 110 9.4 13.742 13.742 13.742 13.742 ls_scf_init_scf 1 4.0 0.000 0.000 13.637 13.637 dbcsr_special_finalize 370 9.1 0.003 0.003 13.551 13.551 ls_scf_init_matrix_S 1 5.0 0.000 0.000 13.109 13.109 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 12.227 12.227 dbcsr_dot_sd 144 6.3 9.276 9.276 9.277 9.277 dbcsr_frobenius_norm 142 6.1 7.787 7.787 7.789 7.789 matrix_qs_to_ls 12 5.1 0.000 0.000 7.634 7.634 matrix_cluster 12 6.1 0.000 0.000 7.634 7.634 make_images_data 370 9.1 0.011 0.011 7.320 7.320 dbcsr_new_transposed 2 7.0 0.139 0.139 6.992 6.992 dbcsr_redistribute 2 8.0 6.740 6.740 6.810 6.810 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.010 94.733 94.734 qs_energies 1 2.0 0.000 0.000 94.632 94.632 ls_scf 1 3.0 0.000 0.000 94.550 94.551 ls_scf_main 1 4.0 0.001 0.003 90.756 90.756 density_matrix_trs4 11 5.0 0.009 0.012 86.889 86.987 dbcsr_multiply_generic 185 6.1 0.078 0.089 81.585 81.785 multiply_cannon 185 7.1 0.042 0.045 68.043 69.046 multiply_cannon_loop 185 8.1 0.219 0.233 64.183 65.206 multiply_cannon_multrec 1480 9.1 42.446 44.314 42.930 44.773 mp_waitall_1 11936 10.3 19.170 21.453 19.170 21.453 multiply_cannon_metrocomm3 1480 9.1 0.019 0.020 11.267 14.631 make_m2s 370 7.1 0.034 0.037 9.212 9.337 make_images 370 8.1 0.703 0.723 9.093 9.222 multiply_cannon_metrocomm1 1480 9.1 0.011 0.012 4.594 7.169 calculate_norms 2960 9.1 5.097 5.306 5.097 5.306 make_images_data 370 9.1 0.012 0.014 3.704 4.025 arnoldi_extremal 12 6.1 0.000 0.001 3.876 3.886 arnoldi_normal_ev 12 7.1 0.002 0.008 3.875 3.885 build_subspace 23 8.1 0.039 0.053 3.747 3.751 mp_sum_l 1039 5.9 3.004 3.601 3.004 3.601 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.346 3.416 dbcsr_matrix_vector_mult 652 9.0 0.018 0.078 3.105 3.206 hybrid_alltoall_any 393 9.9 0.322 1.625 2.998 3.174 dbcsr_complete_redistribute 23 7.5 1.882 1.990 2.940 3.039 matrix_ls_to_qs 11 6.0 0.000 0.000 2.892 2.990 ls_scf_init_scf 1 4.0 0.000 0.000 2.928 2.929 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.890 2.901 matrix_decluster 11 7.0 0.000 0.000 2.635 2.731 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.642 2.645 make_images_pack 370 9.1 2.467 2.610 2.472 2.616 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 2.065 2.601 dbcsr_matrix_vector_mult_local 652 10.0 2.475 2.595 2.479 2.600 buffer_matrices_ensure_size 370 8.1 2.220 2.345 2.220 2.345 dbcsr_add_d 280 6.0 0.002 0.002 2.084 2.185 dbcsr_add_anytype 280 7.0 1.127 1.180 2.083 2.183 dbcsr_finalize 646 7.5 0.014 0.014 1.966 2.065 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=115.118, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=71.731, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=51.75, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=43.695, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=33.074, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.634, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=20.659000000000006, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=42.446, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.882, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.475, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.004, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=19.17, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.097, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 113.355 113.355 lib_test 1 2.0 0.000 0.000 113.348 113.348 dbcsr_run_tests 3 3.0 0.004 0.004 113.347 113.347 test_multiplies_multiproc 3 4.0 0.001 0.001 91.953 91.953 dbcsr_redistribute 9 5.0 61.018 61.018 64.667 64.667 dbcsr_multiply_generic 9 5.0 0.001 0.001 25.507 25.507 dbcsr_make_random_matrix 9 4.0 15.663 15.663 21.298 21.298 multiply_cannon 9 6.0 0.002 0.002 18.614 18.614 multiply_cannon_loop 9 7.0 0.006 0.006 18.074 18.074 multiply_cannon_multrec 9 8.0 18.067 18.067 18.068 18.068 dbcsr_finalize 27 5.7 0.005 0.005 9.585 9.585 dbcsr_merge_all 18 6.5 3.333 3.333 8.774 8.774 tree_to_linear_d 9 7.0 3.405 3.405 3.405 3.405 mp_alltoall_d11v 27 6.0 3.320 3.320 3.320 3.320 dbcsr_data_release 975 7.6 2.594 2.594 2.594 2.594 make_m2s 18 6.0 0.001 0.001 2.307 2.307 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.006 28.750 28.751 lib_test 1 2.0 0.000 0.000 28.716 28.739 dbcsr_run_tests 3 3.0 0.000 0.001 28.714 28.737 test_multiplies_multiproc 3 4.0 0.001 0.002 27.536 27.650 dbcsr_multiply_generic 9 5.0 0.002 0.002 25.394 25.500 multiply_cannon 9 6.0 0.003 0.003 22.960 23.476 multiply_cannon_loop 9 7.0 0.004 0.004 22.500 22.978 multiply_cannon_multrec 72 8.0 18.936 19.575 18.938 19.577 mp_waitall_1 576 9.2 3.981 4.843 3.981 4.843 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 3.174 3.894 mp_sum_l 310 2.7 0.578 1.448 0.578 1.448 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.573 1.444 dbcsr_make_random_matrix 9 4.0 0.903 1.008 1.129 1.249 make_m2s 18 6.0 0.001 0.001 0.991 1.050 make_images 18 7.0 0.027 0.029 0.988 1.047 dbcsr_finalize 27 5.7 0.001 0.001 0.912 1.033 dbcsr_merge_all 18 6.5 0.152 0.173 0.806 0.901 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.377 0.819 dbcsr_data_release 444 7.6 0.699 0.811 0.699 0.811 dbcsr_redistribute 9 5.0 0.404 0.475 0.711 0.752 dbcsr_destroy 111 5.9 0.004 0.058 0.600 0.697 make_images_data 18 8.0 0.001 0.001 0.483 0.589 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.275000000000006, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=61.018, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=18.067, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=15.663, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.405, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.333, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.594, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=3.0970000000000013, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.404, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=18.936, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.903, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.152, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.699, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.578, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.981, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.047 0.047 148.710 148.710 qs_mol_dyn_low 1 2.0 0.005 0.005 146.731 146.731 velocity_verlet 5 3.0 0.005 0.005 118.923 118.923 qmmm_el_coupling 6 3.8 0.000 0.000 73.799 73.799 qmmm_elec_with_gaussian 6 4.8 0.187 0.187 73.793 73.793 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 72.775 72.775 qmmm_elec_gaussian_low_G 6 6.8 71.187 71.187 71.187 71.187 qs_forces 6 3.8 0.001 0.001 59.098 59.098 qs_energies 6 4.8 0.000 0.000 52.578 52.578 scf_env_do_scf 6 5.8 0.001 0.001 48.560 48.560 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 40.591 40.591 rebuild_ks_matrix 45 8.4 0.000 0.000 40.290 40.290 qs_ks_build_kohn_sham_matrix 45 9.4 0.008 0.008 40.290 40.290 qs_ks_update_qs_env 45 7.8 0.000 0.000 34.526 34.526 pw_transfer 966 11.9 0.075 0.075 24.657 24.657 fft_wrap_pw1pw2 801 13.0 0.009 0.009 24.280 24.280 fft_wrap_pw1pw2_150 507 14.3 2.402 2.402 23.720 23.720 qs_vxc_create 45 10.4 0.001 0.001 21.864 21.864 xc_vxc_pw_create 45 11.4 4.226 4.226 21.863 21.863 qs_rho_update_rho 45 7.9 0.000 0.000 10.856 10.856 calculate_rho_elec 45 8.9 0.901 0.901 10.855 10.855 pw_scatter_s 429 15.4 10.702 10.702 10.702 10.702 xc_rho_set_and_dset_create 45 12.4 0.253 0.253 10.096 10.096 fft3d_s 802 15.0 9.644 9.644 9.654 9.654 qmmm_forces 6 3.8 0.002 0.002 8.359 8.359 init_scf_loop 6 6.8 0.000 0.000 7.963 7.963 qmmm_forces_with_gaussian 6 4.8 0.146 0.146 7.813 7.813 pw_integral_ab 2539 7.4 7.595 7.595 7.595 7.595 qs_ks_ddapc 45 10.4 0.001 0.001 6.788 6.788 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.687 6.687 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.776 5.776 qmmm_forces_gaussian_low_G 6 6.8 5.612 5.612 5.612 5.612 pw_poisson_solve 51 9.9 2.310 2.310 5.348 5.348 grid_collocate_task_list 45 9.9 5.048 5.048 5.048 5.048 density_rs2pw 45 9.9 0.003 0.003 4.906 4.906 fist_calc_energy_force 6 3.8 0.002 0.002 4.639 4.639 sum_up_and_integrate 45 10.4 0.244 0.244 4.495 4.495 integrate_v_rspace 45 11.4 0.013 0.013 4.251 4.251 cp_ddapc_apply_CD 45 11.4 0.006 0.006 4.174 4.174 force_nonbond 6 4.8 3.334 3.334 3.334 3.334 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.040 95.319 95.321 qs_mol_dyn_low 1 2.0 0.005 0.006 93.664 93.762 qs_forces 6 3.8 0.001 0.001 70.203 70.203 qs_energies 6 4.8 0.001 0.002 66.983 66.983 scf_env_do_scf 6 5.8 0.000 0.001 65.310 65.310 scf_env_do_scf_inner_loop 113 6.2 0.003 0.010 62.714 62.715 rebuild_ks_matrix 119 8.1 0.000 0.001 46.678 46.696 qs_ks_build_kohn_sham_matrix 119 9.1 0.024 0.027 46.678 46.695 qs_ks_update_qs_env 119 7.3 0.001 0.001 43.917 43.933 velocity_verlet 5 3.0 0.003 0.003 38.651 38.656 pw_transfer 2446 11.8 0.301 0.322 30.633 30.996 fft_wrap_pw1pw2 2059 12.8 0.038 0.040 29.721 30.124 fft_wrap_pw1pw2_150 1321 14.0 2.614 2.805 28.838 29.162 qs_vxc_create 119 10.1 0.004 0.005 24.103 24.111 xc_vxc_pw_create 119 11.1 0.506 0.712 24.099 24.106 fft3d_ps 2059 14.8 13.388 14.623 22.473 23.018 qs_rho_update_rho 119 7.3 0.001 0.001 18.240 18.241 calculate_rho_elec 119 8.3 0.086 0.096 18.239 18.240 sum_up_and_integrate 119 10.1 0.100 0.112 16.379 16.419 integrate_v_rspace 119 11.1 0.005 0.006 16.279 16.328 rs_pw_transfer 988 11.5 0.017 0.019 12.860 13.480 qmmm_forces 6 3.8 0.003 0.003 13.018 13.019 qmmm_forces_with_gaussian 6 4.8 0.450 0.542 12.575 12.793 density_rs2pw 119 9.3 0.012 0.013 11.360 11.887 xc_rho_set_and_dset_create 119 12.1 0.536 0.632 11.204 11.705 potential_pw2rs 119 12.1 0.012 0.013 10.068 10.087 qmmm_el_coupling 6 3.8 0.000 0.001 9.226 9.283 qmmm_elec_with_gaussian 6 4.8 0.401 0.564 9.222 9.279 mp_alltoall_z22v 2059 16.8 5.540 7.189 5.540 7.189 grid_collocate_task_list 119 9.3 6.528 6.948 6.528 6.948 grid_integrate_task_list 119 12.1 5.700 6.160 5.700 6.160 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.844 6.009 rs_pw_transfer_PW2RS_150 125 13.9 2.869 2.979 5.574 5.609 rs_pw_transfer_RS2PW_150 125 11.2 2.313 2.512 4.670 5.264 x_to_yz 1095 16.3 2.107 2.346 4.947 5.242 yz_to_x 964 15.3 1.384 1.570 4.083 5.124 pw_restrict_s3 18 5.8 2.555 2.615 4.930 5.026 mp_waitany 4028 12.8 4.001 4.991 4.001 4.991 qmmm_forces_gaussian_low_G 6 6.8 4.781 4.932 4.781 4.932 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.947 4.005 pw_prolongate_s3 18 6.8 2.036 2.075 3.947 4.005 qs_scf_new_mos 113 7.2 0.001 0.001 3.784 3.793 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.783 3.792 ot_scf_mini 113 9.2 0.002 0.002 3.618 3.626 dbcsr_multiply_generic 2588 12.3 0.100 0.116 3.389 3.570 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.391 3.508 qs_ks_ddapc 119 10.1 0.003 0.003 3.245 3.424 pw_integral_ab 2761 7.7 2.509 2.537 2.925 3.109 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.772 2.773 mp_sum_dm3 33 5.7 2.606 2.716 2.606 2.716 init_scf_loop 6 6.8 0.000 0.001 2.592 2.593 qmmm_elec_gaussian_low_G 6 6.8 2.437 2.538 2.437 2.538 pw_gather_p 964 14.3 2.310 2.458 2.310 2.458 mp_waitall_1 188862 16.2 2.240 2.420 2.240 2.420 pw_scatter_p 1095 15.3 2.215 2.323 2.215 2.323 ot_mini 113 10.2 0.001 0.001 2.281 2.293 pw_derive 732 12.5 1.923 2.056 1.923 2.056 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=38.92200000000001, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=71.187, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.702, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.644, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=7.595, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.612, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.048, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=54.436, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.437, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=2.509, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.781, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.528, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=13.388, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.54, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.7, yerr=0.0 Summary: Performance test took 63 minutes. Status: OK Removing intermediate container e7073e67ed57 ---> acbc963b757d Step 41/41 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 719c8b656659 Removing intermediate container 719c8b656659 ---> 82e504c64dd9 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 82e504c64dd9 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-b51:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-03-11 20:32:26+00:00