StartDate: 2022-02-27 13:37:55+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: b4b7b1df51f45917a366a6574541801c63b708bf CommitTime: 2022-02-27 14:21:21 +0100 CommitAuthor: Ole Schütt CommitSubject: Docker: Run performance tests with -bind-to socket Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=b4b7b1df51f45917a366a6574541801c63b708bf Sending build context to Docker daemon 362.2MB Step 1/41 : FROM ubuntu:20.04 20.04: Pulling from library/ubuntu 08c01a0ec47e: Already exists Digest: sha256:669e010b58baf5beb2836b253c1fd5768333f0d1dbcb834f7c07a4dc93f474be Status: Downloaded newer image for ubuntu:20.04 ---> 54c9d81cbb44 Step 2/41 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 3a843b424587 Step 3/41 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 9621b4b5709d Step 4/41 : RUN ./install_requirements.sh ubuntu:20.04 ---> Using cache ---> c1cdc0430e60 Step 5/41 : RUN mkdir scripts ---> Using cache ---> 3877592c52c6 Step 6/41 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 924d174bd2f7 Step 7/41 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 7fb97c32b287 Step 8/41 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --dry-run ---> Using cache ---> bbadc4f47f53 Step 9/41 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 08941936a9ff Step 10/41 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 73ae15fda692 Step 11/41 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> fbfb1d7faaef Step 12/41 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 91a9df073cd3 Step 13/41 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> d9b1bbaa6218 Step 14/41 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> fc7b7ac22079 Step 15/41 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 86e93a8718fd Step 16/41 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 6a93f89b7991 Step 17/41 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> b17daab24faf Step 18/41 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> ac1bef8e032a Step 19/41 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 5ce1104ca234 Step 20/41 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 7d9b14a4b5c3 Step 21/41 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> b5d4cd5cf81a Step 22/41 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 02533825d5a1 Step 23/41 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> a49032142fd6 Step 24/41 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> ea8efb993f5a Step 25/41 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> f77c94b68c97 Step 26/41 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> b06a1382c04f Step 27/41 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 142b1ca9fb2f Step 28/41 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 33d8da7a2091 Step 29/41 : WORKDIR /workspace/cp2k ---> Using cache ---> 03d7e5ec7430 Step 30/41 : COPY ./Makefile . ---> Using cache ---> e26cdb7f091c Step 31/41 : COPY ./src ./src ---> Using cache ---> b81f5317d42a Step 32/41 : COPY ./exts ./exts ---> Using cache ---> b761a77a542f Step 33/41 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 1c5beb416a7d Step 34/41 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true )" ---> Running in e46f12b9a6f3 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container e46f12b9a6f3 ---> 752fdd0b32f2 Step 35/41 : COPY ./data ./data ---> 4023892f1ba5 Step 36/41 : COPY ./tests ./tests ---> d2b3e30a68e4 Step 37/41 : COPY ./tools/regtesting ./tools/regtesting ---> b06614d6812b Step 38/41 : COPY ./benchmarks ./benchmarks ---> 4ca973686de5 Step 39/41 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 44fcbc9ed31f Step 40/41 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in ff8881b3db7f ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 155.150 155.150 qs_mol_dyn_low 1 2.0 0.004 0.004 154.353 154.353 qs_forces 11 3.9 0.002 0.002 154.296 154.296 qs_energies 11 4.9 0.001 0.001 144.175 144.175 scf_env_do_scf 11 5.9 0.001 0.001 113.146 113.146 velocity_verlet 10 3.0 0.002 0.002 108.268 108.268 scf_env_do_scf_inner_loop 108 6.5 0.009 0.009 77.047 77.047 init_scf_loop 11 6.9 0.000 0.000 35.916 35.916 prepare_preconditioner 11 7.9 0.000 0.000 32.102 32.102 make_preconditioner 11 8.9 0.000 0.000 32.102 32.102 rebuild_ks_matrix 119 8.3 0.001 0.001 31.206 31.206 qs_ks_build_kohn_sham_matrix 119 9.3 0.018 0.018 31.205 31.205 make_full_inverse_cholesky 11 9.9 0.000 0.000 30.129 30.129 qs_ks_update_qs_env 119 7.6 0.001 0.001 29.182 29.182 qs_rho_update_rho 119 7.7 0.001 0.001 27.126 27.126 calculate_rho_elec 119 8.7 1.532 1.532 27.125 27.125 qs_scf_new_mos 108 7.5 0.001 0.001 26.643 26.643 qs_scf_loop_do_ot 108 8.5 0.001 0.001 26.642 26.642 ot_scf_mini 108 9.5 0.003 0.003 24.788 24.788 dbcsr_multiply_generic 2286 12.5 0.184 0.184 22.467 22.467 grid_collocate_task_list 119 9.7 21.323 21.323 21.323 21.323 sum_up_and_integrate 119 10.3 0.385 0.385 19.707 19.707 integrate_v_rspace 119 11.3 0.540 0.540 19.322 19.322 cp_fm_cholesky_invert 11 10.9 18.142 18.142 18.142 18.142 grid_integrate_task_list 119 12.3 16.490 16.490 16.490 16.490 init_scf_run 11 5.9 0.001 0.001 16.063 16.063 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.062 16.062 wfi_extrapolate 11 7.9 0.001 0.001 15.189 15.189 cp_gemm 81 9.0 0.000 0.000 14.751 14.751 cp_gemm_cosma 81 10.0 14.751 14.751 14.751 14.751 ot_mini 108 10.5 0.001 0.001 14.667 14.667 make_m2s 4572 13.5 0.065 0.065 12.435 12.435 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.128 10.128 qs_ot_get_derivative 108 11.5 0.001 0.001 7.543 7.543 pw_transfer 1439 11.6 0.089 0.089 7.142 7.142 ot_diis_step 108 11.5 0.006 0.006 7.120 7.120 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 6.865 6.865 make_images 4572 14.5 2.490 2.490 6.556 6.556 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.331 6.331 qs_ot_get_p 119 10.4 0.001 0.001 6.286 6.286 dbcsr_complete_redistribute 329 12.2 2.977 2.977 6.204 6.204 dbcsr_make_dense_low 5837 15.5 0.095 0.095 6.041 6.041 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.032 6.032 apply_single 119 13.6 0.001 0.001 6.031 6.031 make_dense_data 5837 16.5 5.316 5.316 5.923 5.923 cp_fm_cholesky_decompose 22 10.9 5.922 5.922 5.922 5.922 fft_wrap_pw1pw2_140 487 13.2 0.575 0.575 5.799 5.799 multiply_cannon 2286 13.5 0.968 0.968 5.686 5.686 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.671 5.671 dbcsr_make_images_dense 3978 14.8 0.027 0.027 5.436 5.436 qs_create_task_list 11 7.9 0.000 0.000 5.160 5.160 generate_qs_task_list 11 8.9 3.590 3.590 5.160 5.160 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.128 5.128 dbcsr_copy 2102 12.0 0.274 0.274 5.082 5.082 qs_ot_p2m_diag 50 11.0 0.207 0.207 4.768 4.768 dbcsr_copy_into_existing 22 7.9 4.764 4.764 4.764 4.764 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.679 4.679 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.679 4.679 pw_poisson_solve 119 10.3 1.809 1.809 4.383 4.383 density_rs2pw 119 9.7 0.007 0.007 4.270 4.270 cp_dbcsr_syevd 50 12.0 0.004 0.004 4.251 4.251 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.234 4.234 multiply_cannon_loop 2286 14.5 0.047 0.047 4.180 4.180 multiply_cannon_multrec 2286 15.5 4.067 4.067 4.131 4.131 cp_fm_diag_elpa 50 13.0 0.000 0.000 4.103 4.103 cp_fm_diag_elpa_base 50 14.0 4.046 4.046 4.102 4.102 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 3.884 3.884 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.786 3.786 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.218 3.218 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.016 66.131 66.132 qs_mol_dyn_low 1 2.0 0.006 0.007 65.989 65.995 qs_forces 11 3.9 0.002 0.002 65.935 65.935 qs_energies 11 4.9 0.001 0.001 61.182 61.184 scf_env_do_scf 11 5.9 0.001 0.001 54.992 54.992 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 51.013 51.013 velocity_verlet 10 3.0 0.002 0.002 39.600 39.601 rebuild_ks_matrix 119 8.3 0.001 0.001 25.784 25.828 qs_ks_build_kohn_sham_matrix 119 9.3 0.021 0.022 25.784 25.827 qs_ks_update_qs_env 119 7.6 0.001 0.001 22.784 22.827 qs_rho_update_rho 119 7.7 0.001 0.001 20.562 20.569 calculate_rho_elec 119 8.7 0.047 0.049 20.562 20.569 sum_up_and_integrate 119 10.3 0.040 0.042 20.478 20.500 integrate_v_rspace 119 11.3 0.004 0.005 20.437 20.460 grid_collocate_task_list 119 9.7 15.085 15.785 15.085 15.785 grid_integrate_task_list 119 12.3 14.990 15.462 14.990 15.462 dbcsr_multiply_generic 2286 12.5 0.129 0.133 14.510 14.587 qs_scf_new_mos 108 7.5 0.001 0.001 11.903 11.935 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.903 11.934 ot_scf_mini 108 9.5 0.003 0.003 11.146 11.169 multiply_cannon 2286 13.5 0.227 0.231 9.638 9.793 multiply_cannon_loop 2286 14.5 0.207 0.217 8.742 8.931 mp_waitall_1 169478 16.3 7.000 7.425 7.000 7.425 rs_pw_transfer 974 11.9 0.014 0.016 5.863 6.669 ot_mini 108 10.5 0.001 0.001 6.533 6.558 density_rs2pw 119 9.7 0.008 0.009 4.988 5.813 pw_transfer 1439 11.6 0.135 0.154 4.921 4.994 multiply_cannon_metrocomm3 18288 15.5 0.077 0.080 4.481 4.815 fft_wrap_pw1pw2 1201 12.6 0.013 0.015 4.657 4.729 potential_pw2rs 119 12.3 0.009 0.010 4.388 4.396 fft_wrap_pw1pw2_140 487 13.2 0.503 0.527 4.103 4.270 init_scf_run 11 5.9 0.000 0.001 4.244 4.244 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.243 4.244 init_scf_loop 11 6.9 0.000 0.001 3.964 3.964 wfi_extrapolate 11 7.9 0.001 0.001 3.879 3.879 fft3d_ps 1201 14.6 1.912 2.010 3.393 3.461 make_m2s 4572 13.5 0.074 0.075 3.356 3.406 multiply_cannon_multrec 18288 15.5 3.204 3.367 3.220 3.383 qs_ot_get_derivative 108 11.5 0.001 0.001 3.310 3.335 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.197 3.203 ot_diis_step 108 11.5 0.005 0.005 3.199 3.199 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.145 3.183 apply_single 119 13.6 0.001 0.001 3.145 3.183 mp_waitany 9880 13.7 1.978 2.772 1.978 2.772 make_images 4572 14.5 0.187 0.190 2.697 2.759 rs_pw_transfer_RS2PW_140 130 11.5 0.477 0.503 1.822 2.636 rs_pw_transfer_PW2RS_140 130 13.9 1.077 1.146 2.239 2.281 mp_alltoall_d11v 2130 13.8 1.299 1.829 1.299 1.829 qs_ot_get_p 119 10.4 0.001 0.001 1.633 1.662 cp_gemm 81 9.0 0.000 0.000 1.580 1.586 cp_gemm_cosma 81 10.0 1.580 1.586 1.580 1.586 rs_gather_matrices 119 12.3 0.117 0.129 1.012 1.564 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.410 1.515 qs_energies_init_hamiltonians 11 5.9 0.000 0.001 1.370 1.370 make_images_data 4572 15.5 0.062 0.070 1.243 1.348 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=74.45500000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.323, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=18.142, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.49, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=14.751, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=5.922, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.067, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=22.293999999999997, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.085, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.99, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.58, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.204, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.978, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 208.680 208.680 qs_mol_dyn_low 1 2.0 0.004 0.004 207.881 207.881 qs_forces 11 3.9 0.002 0.002 207.825 207.825 qs_energies 11 4.9 0.001 0.001 194.077 194.077 scf_env_do_scf 11 5.9 0.001 0.001 158.901 158.901 velocity_verlet 10 3.0 0.002 0.002 141.027 141.027 scf_env_do_scf_inner_loop 96 6.5 0.008 0.008 119.019 119.019 rebuild_ks_matrix 107 8.3 0.001 0.001 60.783 60.783 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.017 60.782 60.782 qs_ks_update_qs_env 107 7.6 0.001 0.001 54.746 54.746 qs_rho_update_rho 107 7.7 0.001 0.001 52.909 52.909 calculate_rho_elec 107 8.7 1.369 1.369 52.908 52.908 sum_up_and_integrate 107 10.3 0.343 0.343 50.333 50.333 integrate_v_rspace 107 11.3 0.538 0.538 49.990 49.990 grid_collocate_task_list 107 9.7 47.697 47.697 47.697 47.697 grid_integrate_task_list 107 12.3 47.337 47.337 47.337 47.337 init_scf_loop 11 6.9 0.000 0.000 39.686 39.686 prepare_preconditioner 11 7.9 0.000 0.000 32.522 32.522 make_preconditioner 11 8.9 0.000 0.000 32.522 32.522 make_full_inverse_cholesky 11 9.9 0.000 0.000 30.493 30.493 qs_scf_new_mos 96 7.5 0.001 0.001 23.408 23.408 qs_scf_loop_do_ot 96 8.5 0.001 0.001 23.407 23.407 ot_scf_mini 96 9.5 0.003 0.003 21.707 21.707 dbcsr_multiply_generic 1966 12.4 0.162 0.162 20.074 20.074 init_scf_run 11 5.9 0.001 0.001 18.942 18.942 scf_env_initial_rho_setup 11 6.9 0.001 0.001 18.942 18.942 cp_fm_cholesky_invert 11 10.9 18.023 18.023 18.023 18.023 wfi_extrapolate 11 7.9 0.001 0.001 17.871 17.871 cp_gemm 81 9.0 0.000 0.000 14.728 14.728 cp_gemm_cosma 81 10.0 14.727 14.727 14.727 14.727 ot_mini 96 10.5 0.001 0.001 12.877 12.877 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.309 11.309 make_m2s 3932 13.4 0.056 0.056 11.060 11.060 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.642 7.642 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.811 6.811 qs_ot_get_derivative 96 11.5 0.001 0.001 6.754 6.754 pw_transfer 1295 11.6 0.078 0.078 6.531 6.531 dbcsr_complete_redistribute 317 12.2 2.990 2.990 6.354 6.354 qs_create_task_list 11 7.9 0.000 0.000 6.300 6.300 generate_qs_task_list 11 8.9 4.708 4.708 6.300 6.300 fft_wrap_pw1pw2 1081 12.6 0.009 0.009 6.282 6.282 cp_fm_cholesky_decompose 22 10.9 6.258 6.258 6.258 6.258 ot_diis_step 96 11.5 0.005 0.005 6.120 6.120 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.103 6.103 make_images 3932 14.4 2.217 2.217 5.877 5.877 qs_ot_get_p 107 10.4 0.001 0.001 5.476 5.476 dbcsr_make_dense_low 4961 15.5 0.080 0.080 5.333 5.333 fft_wrap_pw1pw2_140 439 13.2 0.573 0.573 5.327 5.327 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.320 5.320 apply_single 107 13.6 0.000 0.000 5.319 5.319 make_dense_data 4961 16.5 4.731 4.731 5.234 5.234 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.205 5.205 multiply_cannon 1966 13.4 0.914 0.914 5.155 5.155 dbcsr_copy 1855 11.9 0.254 0.254 4.932 4.932 dbcsr_make_images_dense 3386 14.7 0.023 0.023 4.795 4.795 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.766 4.766 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.765 4.765 dbcsr_copy_into_existing 22 7.9 4.639 4.639 4.640 4.640 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.334 4.334 qs_ot_p2m_diag 44 11.0 0.189 0.189 4.189 4.189 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.011 121.495 121.496 qs_mol_dyn_low 1 2.0 0.004 0.006 121.377 121.383 qs_forces 11 3.9 0.002 0.002 121.325 121.325 qs_energies 11 4.9 0.001 0.002 112.903 112.905 scf_env_do_scf 11 5.9 0.001 0.001 103.828 103.829 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 96.565 96.566 velocity_verlet 10 3.0 0.002 0.002 72.217 72.218 rebuild_ks_matrix 107 8.3 0.001 0.001 56.207 56.249 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.021 56.207 56.249 sum_up_and_integrate 107 10.3 0.036 0.039 51.355 51.376 integrate_v_rspace 107 11.3 0.004 0.005 51.319 51.342 qs_ks_update_qs_env 107 7.6 0.001 0.001 49.540 49.574 qs_rho_update_rho 107 7.7 0.001 0.001 46.904 46.907 calculate_rho_elec 107 8.7 0.042 0.044 46.903 46.906 grid_integrate_task_list 107 12.3 45.308 46.197 45.308 46.197 grid_collocate_task_list 107 9.7 41.168 42.188 41.168 42.188 dbcsr_multiply_generic 1966 12.4 0.114 0.115 13.050 13.136 qs_scf_new_mos 96 7.5 0.001 0.001 10.540 10.592 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.539 10.591 ot_scf_mini 96 9.5 0.003 0.003 9.869 9.919 multiply_cannon 1966 13.4 0.195 0.198 8.709 8.987 multiply_cannon_loop 1966 14.4 0.183 0.191 7.923 8.126 rs_pw_transfer 878 11.9 0.013 0.014 6.219 7.340 init_scf_loop 11 6.9 0.000 0.000 7.248 7.249 init_scf_run 11 5.9 0.000 0.001 7.094 7.094 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.094 7.094 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.850 6.857 mp_waitall_1 146670 16.2 6.371 6.707 6.371 6.707 wfi_extrapolate 11 7.9 0.001 0.001 6.508 6.508 density_rs2pw 107 9.7 0.008 0.008 5.283 6.403 ot_mini 96 10.5 0.001 0.001 5.767 5.822 pw_transfer 1295 11.6 0.122 0.137 4.487 4.561 multiply_cannon_metrocomm3 15728 15.4 0.067 0.070 4.076 4.418 fft_wrap_pw1pw2 1081 12.6 0.012 0.013 4.249 4.323 potential_pw2rs 107 12.3 0.008 0.010 4.098 4.107 fft_wrap_pw1pw2_140 439 13.2 0.454 0.482 3.741 3.872 mp_waitany 8968 13.7 2.645 3.834 2.645 3.834 rs_pw_transfer_RS2PW_140 118 11.5 0.389 0.413 2.409 3.529 mp_alltoall_d11v 1998 13.7 2.130 3.493 2.130 3.493 rs_gather_matrices 107 12.3 0.109 0.120 1.865 3.223 fft3d_ps 1081 14.6 1.757 1.893 3.093 3.166 multiply_cannon_multrec 15728 15.4 2.909 3.055 2.923 3.070 make_m2s 3932 13.4 0.064 0.065 2.991 3.033 qs_ot_get_derivative 96 11.5 0.001 0.001 2.884 2.933 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.841 2.881 apply_single 107 13.6 0.001 0.001 2.841 2.881 ot_diis_step 96 11.5 0.004 0.005 2.863 2.863 make_images 3932 14.4 0.165 0.167 2.415 2.461 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=74.638, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=47.697, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=47.337, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=18.023, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=14.727, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=6.258, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=23.09400000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=41.168, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=45.308, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=6.371, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.645, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.909, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.373 0.373 273.581 273.581 qs_energies 1 2.0 0.000 0.000 272.301 272.301 scf_env_do_scf 1 3.0 0.000 0.000 269.997 269.997 qs_ks_update_qs_env 8 5.0 0.000 0.000 252.756 252.756 rebuild_ks_matrix 7 6.0 0.000 0.000 252.651 252.651 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 252.651 252.651 hfx_ks_matrix 7 8.0 0.000 0.000 170.285 170.285 integrate_four_center 7 9.0 1.899 1.899 170.255 170.255 integrate_four_center_main 7 10.0 0.492 0.492 157.411 157.411 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 157.091 157.091 integrate_four_center_bin 457 11.0 156.919 156.919 156.919 156.919 init_scf_loop 1 4.0 0.000 0.000 112.893 112.893 cp_gemm 129 10.3 0.000 0.000 67.925 67.925 cp_gemm_cosma 129 11.3 67.924 67.924 67.924 67.924 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 38.722 38.722 admm_fit_mo_coeffs 7 9.0 0.000 0.000 37.084 37.084 admm_mo_merge_derivs 7 8.0 0.000 0.000 35.092 35.092 merge_mo_derivs_diag 7 9.0 0.021 0.021 35.092 35.092 purify_mo_diag 7 10.0 0.001 0.001 22.322 22.322 fit_mo_coeffs 7 10.0 0.000 0.000 14.763 14.763 prepare_preconditioner 1 5.0 0.000 0.000 13.601 13.601 make_preconditioner 1 6.0 0.000 0.000 13.601 13.601 integrate_four_center_load 7 10.0 0.000 0.000 10.567 10.567 hfx_load_balance 1 11.0 0.002 0.002 10.567 10.567 arnoldi_normal_ev 11 9.3 0.002 0.002 8.368 8.368 estimate_cond_num 1 7.0 0.000 0.000 8.298 8.298 build_subspace 28 9.5 0.014 0.014 8.213 8.213 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.198 0.204 188.249 188.250 qs_energies 1 2.0 0.000 0.000 187.913 187.914 scf_env_do_scf 1 3.0 0.000 0.000 187.387 187.387 qs_ks_update_qs_env 8 5.0 0.000 0.000 184.684 184.684 rebuild_ks_matrix 7 6.0 0.000 0.000 184.672 184.672 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 184.672 184.672 hfx_ks_matrix 7 8.0 0.000 0.001 173.207 173.208 integrate_four_center 7 9.0 0.094 0.397 173.192 173.192 integrate_four_center_main 7 10.0 0.004 0.005 154.704 161.674 integrate_four_center_bin 448 11.0 154.700 161.670 154.700 161.670 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 109.807 109.807 init_scf_loop 1 4.0 0.000 0.000 77.579 77.579 integrate_four_center_load 7 10.0 0.000 0.000 10.706 10.721 hfx_load_balance 1 11.0 0.001 0.001 10.706 10.721 mp_sync 70 11.3 6.981 10.095 6.981 10.095 hfx_load_balance_count 1 12.0 5.168 5.351 5.168 5.351 hfx_load_balance_bin 1 12.0 5.167 5.351 5.167 5.351 cp_gemm 129 10.3 0.000 0.001 4.807 4.812 cp_gemm_cosma 129 11.3 4.807 4.812 4.807 4.812 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=45.97400000000002, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=156.919, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=67.924, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.899, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.492, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.373, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=11.130000000000024, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=154.7, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=4.807, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.094, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.198, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.167, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=6.981, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.168, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 362.779 362.779 qs_energies 1 2.0 0.000 0.000 362.291 362.291 mp2_main 1 3.0 0.000 0.000 356.264 356.264 mp2_gpw_main 1 4.0 0.000 0.000 355.892 355.892 rpa_ri_compute_en 1 5.0 0.000 0.000 343.576 343.576 rpa_num_int 1 6.0 0.001 0.001 343.554 343.554 compute_mat_P_omega 1 7.0 0.002 0.002 169.750 169.750 compute_mat_P_omega_contract 10 8.0 11.542 11.542 168.200 168.200 dbt_total 2336 9.6 0.017 0.017 161.825 161.825 cp_gemm 105 8.4 0.000 0.000 145.692 145.692 cp_gemm_cosma 105 9.4 145.692 145.692 145.692 145.692 GW_matrix_operations 10 7.0 0.005 0.005 102.952 102.952 dbt_contract 787 11.0 47.097 47.097 99.889 99.889 compute_mat_P_omega_calc_M_occ 250 9.0 11.499 11.499 60.838 60.838 dbt_copy 1103 10.7 20.285 20.285 60.695 60.695 dbt_tas_total 1149 12.2 0.232 0.232 49.736 49.736 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 49.375 49.375 dbt_tas_multiply 807 12.1 0.004 0.004 48.520 48.520 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 47.663 47.663 dbt_tas_dbm 807 14.1 0.005 0.005 39.181 39.181 dbm_multiply 807 16.1 39.168 39.168 39.168 39.168 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 37.741 37.741 dbt_tas_mm_1N 524 15.1 0.002 0.002 24.023 24.023 compute_QP_energies 1 7.0 0.000 0.000 19.575 19.575 compute_self_energy_cubic_gw 1 8.0 0.092 0.092 19.574 19.574 dbt_tas_copy 574 11.4 15.960 15.960 19.114 19.114 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 17.768 17.768 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 14.455 14.455 dbt_tas_mm_2 251 15.0 0.002 0.002 13.450 13.450 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 12.465 12.465 dbt_tas_reserve_blocks_index 3261 13.7 7.508 7.508 12.453 12.453 dbt_copy_nocomm 251 12.0 10.861 10.861 12.413 12.413 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 12.301 12.301 dbt_reserve_blocks_index 2280 12.5 1.611 1.611 10.571 10.571 dbt_reserve_blocks_index_array 2222 11.6 0.011 0.011 10.535 10.535 cp_fm_cholesky_invert 10 8.0 8.849 8.849 8.849 8.849 contract_cubic_gw 21 9.0 0.000 0.000 8.451 8.451 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.007 50.571 50.572 qs_energies 1 2.0 0.001 0.001 50.465 50.466 mp2_main 1 3.0 0.000 0.000 49.106 49.108 mp2_gpw_main 1 4.0 0.000 0.001 49.054 49.055 rpa_ri_compute_en 1 5.0 0.000 0.000 47.110 47.111 rpa_num_int 1 6.0 0.000 0.001 47.102 47.103 dbt_total 2336 9.6 0.016 0.017 34.837 34.838 compute_mat_P_omega 1 7.0 0.001 0.002 33.651 33.660 compute_mat_P_omega_contract 10 8.0 0.632 0.664 33.357 33.361 dbt_contract 787 11.0 1.722 1.899 25.482 25.487 dbt_tas_total 1149 12.2 0.070 0.075 22.148 22.148 dbt_tas_multiply 807 12.1 0.003 0.003 22.065 22.068 dbt_tas_dbm 807 14.1 0.004 0.005 15.238 15.239 dbm_multiply 807 16.1 12.238 13.018 12.238 13.018 compute_mat_P_omega_calc_M_occ 250 9.0 0.626 0.659 11.168 11.168 cp_gemm 105 8.4 0.000 0.000 9.421 9.447 cp_gemm_cosma 105 9.4 9.421 9.446 9.421 9.446 dbt_copy 1111 10.7 3.616 3.822 8.016 8.385 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.187 8.187 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 7.002 7.002 dbt_tas_mm_1N 524 15.1 0.002 0.002 6.173 6.730 dbt_tas_mm_2 251 15.0 0.002 0.002 6.422 6.423 mp_sync 8706 11.6 5.300 6.310 5.300 6.310 GW_matrix_operations 10 7.0 0.001 0.002 6.047 6.054 compute_QP_energies 1 7.0 0.000 0.000 3.793 3.793 compute_self_energy_cubic_gw 1 8.0 0.005 0.005 3.791 3.793 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.265 3.265 dbt_communicate_buffer 1098 11.7 0.090 0.099 3.107 3.226 mp_waitall_2 3776 14.7 2.957 3.178 2.957 3.178 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.156 3.159 contract_cubic_gw 21 9.0 0.000 0.000 2.873 2.873 dbt_reserve_blocks_index 2849 12.4 0.100 0.107 2.179 2.502 dbt_reserve_blocks_index_array 2791 11.4 0.010 0.012 2.177 2.501 dbt_tas_reserve_blocks_index 3300 13.8 0.254 0.274 2.135 2.457 dbm_reserve_blocks 3696 14.8 1.949 2.263 1.949 2.263 dbt_tas_replicate 396 14.1 1.180 1.412 2.199 2.229 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 1.940 1.942 convert_to_new_pgrid 2421 14.1 0.032 0.035 1.457 1.525 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.002 1.507 1.512 dbm_copy 1608 15.1 1.417 1.486 1.417 1.486 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.346 1.350 scf_env_do_scf 1 3.0 0.000 0.000 1.308 1.308 scf_env_do_scf_inner_loop 17 4.0 0.001 0.001 1.307 1.308 mp_max_i 1992 9.8 0.884 1.154 0.884 1.154 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=94.577, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=145.692, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_contract", label="dbt_contract", y=47.097, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=39.168, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_copy", label="dbt_copy", y=20.285, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=15.96, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=15.317, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.421, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_contract", label="dbt_contract", y=1.722, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=12.238, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_copy", label="dbt_copy", y=3.616, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.3, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.957, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.019 695.595 695.595 qs_forces 1 2.0 0.000 0.000 694.826 694.826 rebuild_ks_matrix 7 6.6 0.000 0.000 685.675 685.675 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 685.675 685.675 hfx_ks_matrix 7 8.6 0.000 0.000 682.851 682.851 dbt_total 1051 10.5 0.010 0.010 563.132 563.132 hfx_ri_update_ks 7 9.6 0.000 0.000 379.221 379.221 hfx_ri_update_ks_Pmat 7 10.6 61.109 61.109 379.215 379.215 dbt_contract 283 11.7 116.972 116.972 359.263 359.263 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 351.926 351.926 qs_energies 1 3.0 0.000 0.000 342.833 342.833 scf_env_do_scf 1 4.0 0.000 0.000 342.440 342.440 qs_ks_update_qs_env 8 6.0 0.000 0.000 333.755 333.755 hfx_ri_update_forces 1 7.0 0.023 0.023 303.625 303.625 dbt_tas_total 533 12.7 3.101 3.101 230.155 230.155 dbt_tas_multiply 292 12.8 0.001 0.001 225.581 225.581 dbt_tas_dbm 292 14.8 0.002 0.002 203.217 203.217 dbm_multiply 292 16.8 203.212 203.212 203.212 203.212 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 201.343 201.343 dbt_copy 444 11.5 97.468 97.468 199.281 199.281 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 173.700 173.700 init_scf_loop 2 5.0 0.000 0.000 141.094 141.094 dbt_tas_mm_2 119 16.0 0.001 0.001 120.177 120.177 hfx_ri_forces_Pmat_2c_inv_2 9 8.0 0.000 0.000 55.760 55.760 hfx_ri_forces_Pmat_metric 9 8.0 0.001 0.001 54.091 54.091 precalc_derivatives 1 8.0 0.007 0.007 52.633 52.633 dbt_tas_mm_3N 94 14.7 0.000 0.000 51.976 51.976 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 43.522 43.522 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 42.881 42.881 dbt_tas_reserve_blocks_index 1569 14.6 15.949 15.949 36.635 36.635 dbt_tas_copy 287 12.3 27.761 27.761 35.158 35.158 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 33.150 33.150 dbt_reserve_blocks_index 1020 13.6 3.475 3.475 32.290 32.290 dbt_reserve_blocks_index_array 999 12.6 0.008 0.008 31.996 31.996 dbt_tas_mm_3T 77 17.1 0.000 0.000 30.902 30.902 hfx_ri_forces_Pmat_PQ_der 9 8.0 1.386 1.386 29.974 29.974 hfx_ri_forces_Pmat_Pmat_2 9 8.0 0.000 0.000 27.663 27.663 hfx_ri_forces_Pmat_2c_inv_1 1 8.0 4.230 4.230 26.840 26.840 build_3c_derivatives 2 9.0 1.127 1.127 26.708 26.708 get_force_from_3c_trace 18 8.0 24.714 24.714 24.714 24.714 dbt_communicate_buffer 157 12.8 20.398 20.398 20.398 20.398 dbt_split_blocks_generic 138 11.7 16.581 16.581 20.269 20.269 dbt_split_copyback 69 11.7 15.859 15.859 18.115 18.115 dbm_reserve_blocks 1813 15.4 17.096 17.096 17.096 17.096 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.008 97.212 97.213 qs_forces 1 2.0 0.000 0.000 97.053 97.053 rebuild_ks_matrix 7 6.6 0.000 0.000 96.046 96.047 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.003 96.046 96.047 hfx_ks_matrix 7 8.6 0.001 0.001 94.546 94.546 dbt_total 1051 10.5 0.009 0.010 86.525 86.525 dbt_contract 283 11.7 4.707 5.047 68.171 68.188 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 64.024 64.024 dbt_tas_total 533 12.7 0.115 0.228 62.479 62.479 dbt_tas_multiply 292 12.8 0.001 0.002 59.683 59.683 hfx_ri_update_forces 1 7.0 0.003 0.003 59.506 59.506 dbt_tas_dbm 292 14.8 0.002 0.002 46.045 46.045 dbm_multiply 292 16.8 41.051 43.138 41.051 43.138 hfx_ri_update_ks 7 9.6 0.000 0.000 35.039 35.039 hfx_ri_update_ks_Pmat 7 10.6 2.443 2.629 35.038 35.038 qs_energies 1 3.0 0.000 0.000 33.013 33.013 scf_env_do_scf 1 4.0 0.000 0.000 32.817 32.817 qs_ks_update_qs_env 8 6.0 0.000 0.000 32.023 32.024 dbt_tas_mm_2 119 16.0 0.001 0.001 20.500 20.500 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 18.600 18.600 hfx_ri_forces_Pmat_metric 9 8.0 0.001 0.001 17.674 17.674 dbt_copy 464 11.6 5.309 5.470 14.598 14.897 init_scf_loop 2 5.0 0.000 0.000 14.216 14.216 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 13.719 13.719 mp_sync 3797 12.3 8.653 12.279 8.653 12.279 dbt_tas_mm_3N 94 14.7 0.001 0.001 11.496 12.173 dbt_tas_mm_3T 77 17.1 0.000 0.001 11.217 12.057 hfx_ri_forces_Pmat_2c_inv_2 9 8.0 0.000 0.000 10.951 10.951 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 9.098 9.098 hfx_ri_forces_Pmat_PQ_der 9 8.0 0.063 0.065 8.784 8.784 hfx_ri_forces_Pmat_Pmat_2 9 8.0 0.000 0.000 7.134 7.152 dbt_tas_reserve_blocks_index 1602 14.7 0.557 0.588 5.880 6.127 mp_waitall_2 1484 15.5 5.648 6.061 5.648 6.061 dbm_reserve_blocks 1848 15.6 5.626 6.025 5.626 6.025 precalc_derivatives 1 8.0 0.002 0.003 5.528 5.528 dbt_reserve_blocks_index 1191 13.5 0.158 0.167 5.053 5.358 dbt_reserve_blocks_index_array 1170 12.5 0.007 0.008 5.017 5.325 dbt_tas_replicate 246 14.5 1.440 2.145 4.801 5.279 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.234 4.234 hfx_ri_forces_Pmat_2c_inv_1 1 8.0 0.158 0.166 4.187 4.190 convert_to_new_pgrid 876 14.8 0.028 0.037 3.442 3.995 dbm_copy 578 15.8 3.385 3.938 3.385 3.938 dbt_tas_communicate_buffer 498 15.8 0.020 0.024 3.132 3.471 build_3c_derivatives 2 9.0 0.597 0.645 3.092 3.104 dbt_communicate_buffer 328 12.3 0.021 0.022 2.852 3.011 dbt_tas_replicate_communicate_ 127 15.0 0.003 0.004 2.511 2.868 hfx_ri_forces_Pmat_Pmat_1 3 8.0 0.000 0.000 2.360 2.360 dbt_tas_copy 141 13.3 1.209 1.309 2.120 2.280 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 2.216 2.227 mp_sum_l 8004 13.7 1.535 2.219 1.535 2.219 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 2.023 2.023 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=171.9770000000001, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=203.212, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_contract", label="dbt_contract", y=116.972, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_copy", label="dbt_copy", y=97.468, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=61.109, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=27.761, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=17.096, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=22.566000000000003, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=41.051, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_contract", label="dbt_contract", y=4.707, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_copy", label="dbt_copy", y=5.309, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=2.443, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=1.209, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=5.626, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=8.653, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=5.648, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.012 148.334 148.334 qs_energies 1 2.0 0.000 0.000 148.192 148.192 mp2_main 1 3.0 0.000 0.000 90.338 90.338 mp2_gpw_main 1 4.0 0.005 0.005 84.596 84.596 mp2_ri_gpw_compute_in 1 5.0 0.113 0.113 81.289 81.289 eigensolver 11 5.8 0.001 0.001 61.602 61.602 cp_fm_syevd 11 6.8 0.000 0.000 61.458 61.458 cp_fm_syevd_base 11 7.8 61.457 61.457 61.457 61.457 mp2_ri_gpw_compute_in_loop 1 6.0 0.012 0.012 59.056 59.056 scf_env_do_scf 1 3.0 0.000 0.000 57.563 57.563 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 57.563 57.563 qs_scf_new_mos 10 5.0 0.000 0.000 56.545 56.545 mp2_eri_3c_integrate_gpw 1328 7.0 0.015 0.015 50.110 50.110 integrate_v_rspace 1338 8.0 0.545 0.545 33.244 33.244 grid_integrate_task_list 1338 9.0 29.763 29.763 29.763 29.763 calculate_wavefunction 2656 9.0 20.493 20.493 29.591 29.591 get_2c_integrals 1 6.0 0.000 0.000 22.098 22.098 compute_2c_integrals 1 7.0 0.003 0.003 21.415 21.415 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 21.404 21.404 mp2_eri_2c_integrate_gpw 1 9.0 1.173 1.173 21.392 21.392 pw_transfer 32000 10.6 1.059 1.059 15.837 15.837 fft_wrap_pw1pw2 26668 11.4 0.118 0.118 14.570 14.570 fft_wrap_pw1pw2_80 10626 12.4 0.412 0.412 11.217 11.217 fft3d_s 26669 13.4 9.806 9.806 9.856 9.856 dbcsr_multiply_generic 2666 8.0 0.157 0.157 6.780 6.780 ao_to_mo_and_store_B_mult_1 1328 7.0 0.010 0.010 6.750 6.750 potential_pw2rs 2666 10.0 0.231 0.231 5.812 5.812 mp2_eri_2c_integrate_gpw_pot_l 1328 10.0 0.006 0.006 5.125 5.125 calc_potential_gpw 2656 9.5 0.016 0.016 4.727 4.727 mp2_ri_gpw_compute_en 1 5.0 0.010 0.010 3.282 3.282 multiply_cannon 2666 9.0 0.842 0.842 3.136 3.136 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.008 34.880 34.881 qs_energies 1 2.0 0.000 0.001 34.782 34.782 mp2_main 1 3.0 0.000 0.001 33.357 33.357 mp2_gpw_main 1 4.0 0.001 0.001 33.304 33.304 mp2_ri_gpw_compute_in 1 5.0 0.004 0.004 31.150 32.146 mp2_ri_gpw_compute_in_loop 1 6.0 0.000 0.001 28.602 29.598 mp2_eri_3c_integrate_gpw 42 7.0 0.001 0.001 27.964 28.952 integrate_v_rspace 52 8.2 0.023 0.025 27.148 28.105 grid_integrate_task_list 52 9.2 26.748 27.727 26.748 27.727 mp2_ri_gpw_compute_en 1 5.0 0.037 0.038 2.125 3.030 get_2c_integrals 1 6.0 0.000 0.000 2.520 2.525 compute_2c_integrals 1 7.0 0.001 0.001 2.475 2.479 compute_2c_integrals_loop_lm 1 8.0 0.000 0.001 2.342 2.455 mp2_eri_2c_integrate_gpw 1 9.0 0.318 0.355 2.342 2.455 pw_transfer 1136 10.5 0.050 0.052 2.282 2.329 fft_wrap_pw1pw2 948 11.3 0.005 0.006 2.172 2.221 fft_wrap_pw1pw2_80 338 12.4 0.075 0.080 1.914 1.959 calculate_wavefunction 84 9.0 0.645 0.709 1.856 1.943 mp2_ri_get_sizes 1 6.0 0.000 0.000 0.996 1.902 mp_min_d 1 7.0 0.996 1.902 0.996 1.902 fft3d_s 843 13.4 1.514 1.560 1.547 1.593 scf_env_do_scf 1 3.0 0.000 0.000 1.322 1.323 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 1.322 1.323 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.113 0.158 0.817 0.856 prepare_gpw 2 8.5 0.000 0.001 0.763 0.787 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=25.64200000000001, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=61.457, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=29.763, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=20.493, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.806, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_eri_2c_integrate_gpw", label="mp2_eri_2c_integrate_gpw", y=1.173, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_min_d", label="mp_min_d", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=4.6590000000000025, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="cp_fm_syevd_base", label="cp_fm_syevd_base", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=26.748, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.645, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="fft3d_s", label="fft3d_s", y=1.514, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_eri_2c_integrate_gpw", label="mp2_eri_2c_integrate_gpw", y=0.318, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_min_d", label="mp_min_d", y=0.996, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.099 0.099 187.230 187.230 qs_energies 1 2.0 0.000 0.000 185.469 185.469 scf_env_do_scf 1 3.0 0.000 0.000 175.830 175.830 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 175.830 175.830 qs_scf_new_mos 15 5.0 0.000 0.000 77.144 77.144 qs_ks_update_qs_env 15 5.0 0.000 0.000 68.465 68.465 rebuild_ks_matrix 15 6.0 0.000 0.000 68.102 68.102 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 68.102 68.102 eigensolver 15 6.0 0.002 0.002 63.795 63.795 cp_fm_diag_elpa 15 7.0 0.000 0.000 50.358 50.358 cp_fm_diag_elpa_base 15 8.0 45.866 45.866 50.358 50.358 qs_vxc_create 15 8.0 0.036 0.036 43.837 43.837 calculate_dispersion_nonloc 15 9.0 8.839 8.839 38.037 38.037 pw_transfer 1191 9.8 0.090 0.090 25.888 25.888 fft_wrap_pw1pw2 1086 10.9 0.012 0.012 25.613 25.613 qs_rho_update_rho 16 5.0 0.000 0.000 24.091 24.091 calculate_rho_elec 16 6.0 0.338 0.338 24.091 24.091 sum_up_and_integrate 15 8.0 0.075 0.075 22.724 22.724 integrate_v_rspace 15 9.0 0.034 0.034 22.649 22.649 grid_collocate_task_list 16 7.0 22.564 22.564 22.564 22.564 grid_integrate_task_list 15 10.0 22.050 22.050 22.050 22.050 fft_wrap_pw1pw2_150 765 12.0 3.303 3.303 19.364 19.364 copy_dbcsr_to_fm 16 5.9 0.001 0.001 10.714 10.714 fft3d_s 1087 12.8 10.556 10.556 10.568 10.568 pw_scatter_s 585 13.0 10.203 10.203 10.203 10.203 dbcsr_complete_redistribute 46 8.3 3.429 3.429 9.439 9.439 cp_fm_cholesky_restore 45 7.0 8.985 8.985 8.985 8.985 cp_fm_upper_to_full 30 8.0 8.941 8.941 8.941 8.941 vdW_energy 15 10.0 7.561 7.561 7.561 7.561 gspace_mixing 14 5.0 0.271 0.271 7.417 7.417 broyden_mixing 14 6.0 6.678 6.678 6.678 6.678 fft_wrap_pw1pw2_200 197 11.5 0.322 0.322 6.004 6.004 xc_vxc_pw_create 15 9.0 1.534 1.534 5.764 5.764 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.584 4.584 init_scf_run 1 3.0 0.000 0.000 4.325 4.325 dbcsr_finalize 159 9.9 0.020 0.020 4.117 4.117 dbcsr_merge_all 91 11.1 0.066 0.066 3.970 3.970 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.016 81.018 81.019 qs_energies 1 2.0 0.000 0.001 80.617 80.618 scf_env_do_scf 1 3.0 0.000 0.000 75.502 75.503 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 75.502 75.503 qs_ks_update_qs_env 15 5.0 0.000 0.000 37.062 37.073 rebuild_ks_matrix 15 6.0 0.000 0.000 37.017 37.029 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.004 37.016 37.029 qs_rho_update_rho 16 5.0 0.000 0.000 22.841 22.846 calculate_rho_elec 16 6.0 0.011 0.012 22.841 22.845 sum_up_and_integrate 15 8.0 0.011 0.012 22.529 22.558 integrate_v_rspace 15 9.0 0.001 0.001 22.518 22.547 grid_collocate_task_list 16 7.0 21.083 21.699 21.083 21.699 grid_integrate_task_list 15 10.0 20.838 21.520 20.838 21.520 qs_scf_new_mos 15 5.0 0.001 0.001 16.134 16.201 eigensolver 15 6.0 0.002 0.003 14.838 14.849 qs_vxc_create 15 8.0 0.001 0.001 14.011 14.021 calculate_dispersion_nonloc 15 9.0 1.350 1.395 11.373 11.391 cp_fm_diag_elpa 15 7.0 0.000 0.000 10.856 10.862 cp_fm_diag_elpa_base 15 8.0 10.630 10.666 10.852 10.854 pw_transfer 1191 9.8 0.122 0.138 10.329 10.430 fft_wrap_pw1pw2 1086 10.9 0.019 0.020 10.063 10.183 fft3d_ps 1086 12.9 4.422 4.652 7.544 7.780 fft_wrap_pw1pw2_150 765 12.0 0.579 0.635 6.649 6.726 cp_fm_cholesky_restore 45 7.0 3.763 3.810 3.763 3.810 fft_wrap_pw1pw2_200 197 11.5 0.333 0.359 3.287 3.347 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.183 3.183 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.733 3.015 xc_vxc_pw_create 15 9.0 0.052 0.068 2.637 2.654 mp_alltoall_z22v 1086 14.9 1.853 2.428 1.853 2.428 rs_pw_transfer 158 9.4 0.002 0.002 1.695 2.125 vdW_energy 15 10.0 2.024 2.124 2.024 2.124 build_core_ppnl 1 5.0 1.825 2.034 1.825 2.034 x_to_yz 585 14.0 0.808 0.840 1.778 1.920 density_rs2pw 16 7.0 0.002 0.002 1.615 1.879 yz_to_x 501 13.7 0.432 0.520 1.315 1.677 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=67.00599999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=45.866, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.564, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.05, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.556, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.203, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=8.985, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=20.282000000000004, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=10.63, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.083, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=20.838, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.763, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=4.422, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.084 0.084 298.466 298.466 qs_energies 1 2.0 0.000 0.000 298.304 298.304 ls_scf 1 3.0 0.000 0.000 296.399 296.399 ls_scf_main 1 4.0 0.002 0.002 284.069 284.069 density_matrix_trs4 11 5.0 0.011 0.011 170.732 170.732 dbcsr_multiply_generic 185 6.1 0.612 0.612 107.516 107.516 ls_scf_dm_to_ks 11 5.0 0.000 0.000 106.405 106.405 matrix_ls_to_qs 11 6.0 0.000 0.000 101.960 101.960 multiply_cannon 185 7.1 3.096 3.096 73.802 73.802 dbcsr_complete_redistribute 23 7.5 41.309 41.309 56.538 56.538 multiply_cannon_loop 185 8.1 0.386 0.386 52.872 52.872 matrix_decluster 11 7.0 0.000 0.000 51.522 51.522 multiply_cannon_multrec 185 9.1 50.798 50.798 50.850 50.850 dbcsr_copy_into_existing 11 7.0 50.437 50.437 50.437 50.437 arnoldi_extremal 12 6.1 0.000 0.000 44.668 44.668 arnoldi_normal_ev 12 7.1 0.027 0.027 44.668 44.668 build_subspace 23 8.1 0.131 0.131 44.019 44.019 dbcsr_matrix_vector_mult 652 9.0 0.249 0.249 34.259 34.259 dbcsr_matrix_vector_mult_local 652 10.0 32.727 32.727 32.736 32.736 make_m2s 370 7.1 0.030 0.030 27.544 27.544 make_images 370 8.1 7.239 7.239 25.219 25.219 dbcsr_finalize 646 7.5 0.193 0.193 20.487 20.487 dbcsr_merge_all 597 8.5 3.351 3.351 18.526 18.526 setup_rec_index_2d 370 8.1 17.688 17.688 17.688 17.688 dbcsr_sort_indices 1103 9.9 14.297 14.297 14.297 14.297 tree_to_linear_d 110 9.4 13.030 13.030 13.030 13.030 quick_finalize 395 10.0 0.449 0.449 12.172 12.172 ls_scf_init_scf 1 4.0 0.000 0.000 11.391 11.391 dbcsr_special_finalize 370 9.1 0.003 0.003 11.227 11.227 ls_scf_init_matrix_S 1 5.0 0.000 0.000 10.946 10.946 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 10.062 10.062 dbcsr_dot_sd 144 6.3 8.864 8.864 8.865 8.865 dbcsr_frobenius_norm 142 6.1 7.540 7.540 7.541 7.541 matrix_qs_to_ls 12 5.1 0.000 0.000 7.237 7.237 matrix_cluster 12 6.1 0.000 0.000 7.237 7.237 make_images_data 370 9.1 0.010 0.010 6.568 6.568 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.009 87.450 87.451 qs_energies 1 2.0 0.000 0.000 87.364 87.364 ls_scf 1 3.0 0.000 0.000 87.283 87.284 ls_scf_main 1 4.0 0.000 0.003 83.805 83.805 density_matrix_trs4 11 5.0 0.008 0.012 80.115 80.182 dbcsr_multiply_generic 185 6.1 0.075 0.090 75.295 75.528 multiply_cannon 185 7.1 0.038 0.041 62.779 63.546 multiply_cannon_loop 185 8.1 0.198 0.211 59.197 60.568 multiply_cannon_multrec 1480 9.1 39.005 40.871 39.460 41.335 mp_waitall_1 11936 10.3 17.861 20.542 17.861 20.542 multiply_cannon_metrocomm3 1480 9.1 0.017 0.019 10.409 13.813 make_m2s 370 7.1 0.032 0.036 8.540 8.650 make_images 370 8.1 0.687 0.731 8.424 8.537 multiply_cannon_metrocomm1 1480 9.1 0.010 0.012 4.405 6.089 calculate_norms 2960 9.1 4.654 4.934 4.654 4.934 make_images_data 370 9.1 0.012 0.013 3.410 3.752 arnoldi_extremal 12 6.1 0.000 0.001 3.506 3.513 arnoldi_normal_ev 12 7.1 0.002 0.007 3.506 3.513 mp_sum_l 1039 5.9 2.749 3.395 2.749 3.395 build_subspace 23 8.1 0.034 0.046 3.387 3.390 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.191 3.262 hybrid_alltoall_any 393 9.9 0.296 1.457 2.757 3.076 dbcsr_matrix_vector_mult 652 9.0 0.018 0.079 2.884 2.983 dbcsr_complete_redistribute 23 7.5 1.816 1.908 2.846 2.912 matrix_ls_to_qs 11 6.0 0.000 0.000 2.821 2.899 ls_scf_init_scf 1 4.0 0.000 0.000 2.661 2.662 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.622 2.632 matrix_decluster 11 7.0 0.000 0.000 2.556 2.629 make_images_pack 370 9.1 2.273 2.517 2.277 2.522 dbcsr_matrix_vector_mult_local 652 10.0 2.339 2.501 2.343 2.506 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.900 2.408 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.396 2.400 buffer_matrices_ensure_size 370 8.1 2.031 2.137 2.031 2.137 dbcsr_add_d 280 6.0 0.001 0.002 1.931 1.990 dbcsr_add_anytype 280 7.0 1.040 1.088 1.929 1.989 dbcsr_finalize 646 7.5 0.013 0.014 1.857 1.936 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=105.50700000000003, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=50.798, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=50.437, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=41.309, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=32.727, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=17.688, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=19.025999999999996, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=39.005, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.816, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.339, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.749, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=17.861, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.654, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 93.681 93.681 lib_test 1 2.0 0.000 0.000 93.674 93.674 dbcsr_run_tests 3 3.0 0.002 0.002 93.674 93.674 test_multiplies_multiproc 3 4.0 0.001 0.001 74.375 74.375 dbcsr_redistribute 9 5.0 47.328 47.328 50.724 50.724 dbcsr_multiply_generic 9 5.0 0.001 0.001 22.047 22.047 dbcsr_make_random_matrix 9 4.0 13.970 13.970 19.213 19.213 multiply_cannon 9 6.0 0.002 0.002 15.557 15.557 multiply_cannon_loop 9 7.0 0.003 0.003 15.051 15.051 multiply_cannon_multrec 9 8.0 15.047 15.047 15.047 15.047 dbcsr_finalize 27 5.7 0.004 0.004 9.021 9.021 dbcsr_merge_all 18 6.5 3.232 3.232 8.310 8.310 tree_to_linear_d 9 7.0 3.170 3.170 3.170 3.170 mp_alltoall_d11v 27 6.0 3.077 3.077 3.077 3.077 dbcsr_data_release 975 7.6 2.372 2.372 2.372 2.372 make_m2s 18 6.0 0.001 0.001 2.150 2.150 make_images 18 7.0 0.686 0.686 2.080 2.080 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 24.617 24.617 lib_test 1 2.0 0.000 0.000 24.587 24.608 dbcsr_run_tests 3 3.0 0.000 0.001 24.586 24.607 test_multiplies_multiproc 3 4.0 0.001 0.001 23.437 23.504 dbcsr_multiply_generic 9 5.0 0.001 0.001 21.621 21.722 multiply_cannon 9 6.0 0.002 0.002 19.411 19.935 multiply_cannon_loop 9 7.0 0.003 0.004 18.957 19.424 multiply_cannon_multrec 72 8.0 16.067 16.753 16.068 16.754 mp_waitall_1 576 9.2 3.271 3.886 3.271 3.886 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 2.557 3.169 dbcsr_make_random_matrix 9 4.0 0.871 0.886 1.109 1.135 mp_sum_l 310 2.7 0.548 1.037 0.548 1.037 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.544 1.033 make_m2s 18 6.0 0.001 0.001 0.885 0.987 make_images 18 7.0 0.026 0.027 0.882 0.984 dbcsr_finalize 27 5.7 0.000 0.001 0.848 0.918 dbcsr_merge_all 18 6.5 0.133 0.159 0.751 0.853 dbcsr_data_release 444 7.6 0.649 0.724 0.649 0.724 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.320 0.687 dbcsr_redistribute 9 5.0 0.376 0.442 0.638 0.676 dbcsr_destroy 111 5.9 0.004 0.054 0.558 0.636 make_images_data 18 8.0 0.001 0.001 0.439 0.575 dbcsr_data_copy_aa2 18 7.5 0.437 0.500 0.437 0.500 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=8.561999999999998, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=47.328, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.047, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.97, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.232, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.17, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.372, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.7020000000000017, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.376, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.067, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.871, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.133, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.649, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.271, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.548, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.045 0.045 134.781 134.781 qs_mol_dyn_low 1 2.0 0.004 0.004 132.812 132.812 velocity_verlet 5 3.0 0.004 0.004 106.808 106.808 qmmm_el_coupling 6 3.8 0.000 0.000 62.146 62.146 qmmm_elec_with_gaussian 6 4.8 0.189 0.189 62.140 62.140 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 61.163 61.163 qmmm_elec_gaussian_low_G 6 6.8 59.888 59.888 59.888 59.888 qs_forces 6 3.8 0.001 0.001 55.112 55.112 qs_energies 6 4.8 0.000 0.000 49.033 49.033 scf_env_do_scf 6 5.8 0.000 0.000 45.232 45.232 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 37.942 37.942 rebuild_ks_matrix 45 8.4 0.000 0.000 37.894 37.894 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 37.894 37.894 qs_ks_update_qs_env 45 7.8 0.000 0.000 32.514 32.514 pw_transfer 966 11.9 0.066 0.066 22.747 22.747 fft_wrap_pw1pw2 801 13.0 0.008 0.008 22.426 22.426 fft_wrap_pw1pw2_150 507 14.3 2.354 2.354 21.919 21.919 qs_vxc_create 45 10.4 0.001 0.001 20.469 20.469 xc_vxc_pw_create 45 11.4 4.218 4.218 20.468 20.468 pw_scatter_s 429 15.4 10.102 10.102 10.102 10.102 qs_rho_update_rho 45 7.9 0.000 0.000 9.934 9.934 calculate_rho_elec 45 8.9 0.882 0.882 9.934 9.934 xc_rho_set_and_dset_create 45 12.4 0.238 0.238 9.397 9.397 fft3d_s 802 15.0 8.670 8.670 8.681 8.681 qmmm_forces 6 3.8 0.001 0.001 8.096 8.096 qmmm_forces_with_gaussian 6 4.8 0.124 0.124 7.600 7.600 init_scf_loop 6 6.8 0.000 0.000 7.285 7.285 pw_integral_ab 2539 7.4 7.224 7.224 7.224 7.224 fist_calc_energy_force 6 3.8 0.002 0.002 6.680 6.680 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.533 6.533 qs_ks_ddapc 45 10.4 0.001 0.001 6.400 6.400 force_nonbond 6 4.8 5.472 5.472 5.472 5.472 qmmm_forces_gaussian_low_G 6 6.8 5.419 5.419 5.419 5.419 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.391 5.391 pw_poisson_solve 51 9.9 2.200 2.200 5.079 5.079 grid_collocate_task_list 45 9.9 4.531 4.531 4.531 4.531 density_rs2pw 45 9.9 0.003 0.003 4.520 4.520 sum_up_and_integrate 45 10.4 0.225 0.225 4.242 4.242 integrate_v_rspace 45 11.4 0.012 0.012 4.017 4.017 cp_ddapc_apply_CD 45 11.4 0.006 0.006 3.966 3.966 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.037 83.448 83.450 qs_mol_dyn_low 1 2.0 0.004 0.005 81.910 82.003 qs_forces 6 3.8 0.001 0.001 60.174 60.174 qs_energies 6 4.8 0.001 0.001 57.326 57.326 scf_env_do_scf 6 5.8 0.000 0.001 55.860 55.860 scf_env_do_scf_inner_loop 113 6.2 0.003 0.009 53.603 53.605 rebuild_ks_matrix 119 8.1 0.000 0.001 39.484 39.502 qs_ks_build_kohn_sham_matrix 119 9.1 0.021 0.023 39.483 39.501 qs_ks_update_qs_env 119 7.3 0.001 0.001 37.086 37.103 velocity_verlet 5 3.0 0.002 0.003 34.452 34.457 pw_transfer 2446 11.8 0.277 0.298 24.985 25.210 fft_wrap_pw1pw2 2059 12.8 0.033 0.036 24.211 24.467 fft_wrap_pw1pw2_150 1321 14.0 2.171 2.339 23.542 23.726 qs_vxc_create 119 10.1 0.003 0.004 19.938 19.942 xc_vxc_pw_create 119 11.1 0.425 0.587 19.935 19.938 fft3d_ps 2059 14.8 10.954 11.836 18.257 18.690 qs_rho_update_rho 119 7.3 0.001 0.001 15.689 15.690 calculate_rho_elec 119 8.3 0.085 0.095 15.689 15.689 sum_up_and_integrate 119 10.1 0.084 0.091 14.292 14.325 integrate_v_rspace 119 11.1 0.005 0.006 14.207 14.243 qmmm_forces 6 3.8 0.003 0.003 12.081 12.082 qmmm_forces_with_gaussian 6 4.8 0.371 0.456 11.715 11.827 rs_pw_transfer 988 11.5 0.015 0.017 10.767 11.374 density_rs2pw 119 9.3 0.011 0.012 9.403 9.929 xc_rho_set_and_dset_create 119 12.1 0.501 0.588 9.490 9.858 qmmm_el_coupling 6 3.8 0.000 0.000 8.536 8.597 qmmm_elec_with_gaussian 6 4.8 0.333 0.429 8.533 8.594 potential_pw2rs 119 12.1 0.011 0.012 8.356 8.366 grid_collocate_task_list 119 9.3 6.033 6.440 6.033 6.440 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.738 5.836 mp_alltoall_z22v 2059 16.8 4.397 5.762 4.397 5.762 grid_integrate_task_list 119 12.1 5.500 5.708 5.500 5.708 qmmm_forces_gaussian_low_G 6 6.8 4.684 4.795 4.684 4.795 rs_pw_transfer_PW2RS_150 125 13.9 2.378 2.471 4.685 4.728 rs_pw_transfer_RS2PW_150 125 11.2 1.956 2.085 3.883 4.459 pw_restrict_s3 18 5.8 2.374 2.427 4.393 4.451 mp_waitany 4028 12.8 3.338 4.447 3.338 4.447 yz_to_x 964 15.3 1.072 1.244 3.223 4.198 x_to_yz 1095 16.3 1.786 1.934 4.032 4.174 qs_scf_new_mos 113 7.2 0.001 0.001 3.605 3.614 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.605 3.613 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.539 3.588 pw_prolongate_s3 18 6.8 1.888 1.910 3.539 3.588 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.354 3.458 ot_scf_mini 113 9.2 0.002 0.002 3.446 3.456 dbcsr_multiply_generic 2588 12.3 0.097 0.113 3.225 3.315 qs_ks_ddapc 119 10.1 0.003 0.003 2.747 2.890 pw_integral_ab 2761 7.7 2.096 2.120 2.431 2.605 qmmm_elec_gaussian_low_G 6 6.8 2.430 2.527 2.430 2.527 mp_sum_dm3 33 5.7 2.325 2.523 2.325 2.523 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.407 2.408 init_scf_loop 6 6.8 0.000 0.000 2.253 2.253 ot_mini 113 10.2 0.001 0.001 2.179 2.190 pw_gather_p 964 14.3 1.930 2.153 1.930 2.153 mp_waitall_1 188862 16.2 1.873 2.018 1.873 2.018 pw_derive 732 12.5 1.658 1.837 1.658 1.837 pw_scatter_p 1095 15.3 1.761 1.826 1.761 1.826 qs_ot_get_derivative 113 11.2 0.001 0.001 1.723 1.732 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=33.47500000000001, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=59.888, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.102, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.67, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=7.224, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=5.472, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.419, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.531, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=47.35399999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.43, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=2.096, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.684, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.033, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.397, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=10.954, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.5, yerr=0.0 Summary: Performance test took 56 minutes. Status: OK Removing intermediate container ff8881b3db7f ---> 0e0bc2b6d236 Step 41/41 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in fa3d0e6db32d Removing intermediate container fa3d0e6db32d ---> b7d16f108a73 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built b7d16f108a73 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-b51:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-02-27 14:49:46+00:00