StartDate: 2022-02-14 11:08:15+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 2f1076a9662081232f36ba214e302baf46fa0acc CommitTime: 2022-02-13 22:06:01 +0100 CommitAuthor: Ole Schütt CommitSubject: Docker: Install CP2K from docker build context Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=2f1076a9662081232f36ba214e302baf46fa0acc Sending build context to Docker daemon 362.1MB Step 1/35 : FROM ubuntu:20.04 20.04: Pulling from library/ubuntu 08c01a0ec47e: Already exists Digest: sha256:669e010b58baf5beb2836b253c1fd5768333f0d1dbcb834f7c07a4dc93f474be Status: Downloaded newer image for ubuntu:20.04 ---> 54c9d81cbb44 Step 2/35 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 3a843b424587 Step 3/35 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 9621b4b5709d Step 4/35 : RUN ./install_requirements.sh ubuntu:20.04 ---> Using cache ---> c1cdc0430e60 Step 5/35 : RUN mkdir scripts ---> Using cache ---> 3877592c52c6 Step 6/35 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 924d174bd2f7 Step 7/35 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 7fb97c32b287 Step 8/35 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --dry-run ---> Using cache ---> bbadc4f47f53 Step 9/35 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 08941936a9ff Step 10/35 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 73ae15fda692 Step 11/35 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> fbfb1d7faaef Step 12/35 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 91a9df073cd3 Step 13/35 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> d9b1bbaa6218 Step 14/35 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> fc7b7ac22079 Step 15/35 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 86e93a8718fd Step 16/35 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 6a93f89b7991 Step 17/35 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> b17daab24faf Step 18/35 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> ac1bef8e032a Step 19/35 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 6cb4d58a875a Step 20/35 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 12378c7b2b39 Step 21/35 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 9c225a1f4125 Step 22/35 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> a6af2a002c66 Step 23/35 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> e2afafa819df Step 24/35 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 1bbfd22e66d4 Step 25/35 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> f57a4b8136ca Step 26/35 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 3636d4fc429b Step 27/35 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 818ee3b13ca9 Step 28/35 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 11b5bfefb83a Step 29/35 : WORKDIR /workspace ---> Running in d8ac54c78214 Removing intermediate container d8ac54c78214 ---> 6e0089ae1f2e Step 30/35 : COPY ./tools/docker/scripts/install_basics.sh . ---> 5b0866931735 Step 31/35 : RUN ./install_basics.sh ---> Running in ddfce0b9de4f Installing Ubuntu packages... debconf: delaying package configuration, since apt-utils is not installed Selecting previously unselected package libpopt0:amd64. (Reading database ... (Reading database ... 5% (Reading database ... 10% (Reading database ... 15% (Reading database ... 20% (Reading database ... 25% (Reading database ... 30% (Reading database ... 35% (Reading database ... 40% (Reading database ... 45% (Reading database ... 50% (Reading database ... 55% (Reading database ... 60% (Reading database ... 65% (Reading database ... 70% (Reading database ... 75% (Reading database ... 80% (Reading database ... 85% (Reading database ... 90% (Reading database ... 95% (Reading database ... 100% (Reading database ... 14726 files and directories currently installed.) Preparing to unpack .../libpopt0_1.16-14_amd64.deb ... Unpacking libpopt0:amd64 (1.16-14) ... Selecting previously unselected package rsync. Preparing to unpack .../rsync_3.1.3-8ubuntu0.1_amd64.deb ... Unpacking rsync (3.1.3-8ubuntu0.1) ... Setting up libpopt0:amd64 (1.16-14) ... Setting up rsync (3.1.3-8ubuntu0.1) ... invoke-rc.d: could not determine current runlevel invoke-rc.d: policy-rc.d denied execution of start. Processing triggers for libc-bin (2.31-0ubuntu9.2) ... done. Cloning cp2k repository... done. Removing intermediate container ddfce0b9de4f ---> 7d2f50d9a153 Step 32/35 : COPY ./tools/docker/scripts/install_performance.sh . ---> 2b6fafa67606 Step 33/35 : RUN ./install_performance.sh "local" ---> Running in c71d73e4af92 './local.pdbg' -> '/opt/cp2k-toolchain/install/arch/local.pdbg' './local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' './local.sdbg' -> '/opt/cp2k-toolchain/install/arch/local.sdbg' './local.ssmp' -> '/opt/cp2k-toolchain/install/arch/local.ssmp' './local_coverage.pdbg' -> '/opt/cp2k-toolchain/install/arch/local_coverage.pdbg' './local_static.psmp' -> '/opt/cp2k-toolchain/install/arch/local_static.psmp' './local_static.ssmp' -> '/opt/cp2k-toolchain/install/arch/local_static.ssmp' './local_warn.psmp' -> '/opt/cp2k-toolchain/install/arch/local_warn.psmp' Warming cache by trying to compile cp2k... done. Removing intermediate container c71d73e4af92 ---> 742ba609de6d Step 34/35 : COPY ./tools/docker/scripts/ci_entrypoint.sh ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 5c52940b0d53 Step 35/35 : CMD ["./ci_entrypoint.sh", "./test_performance.sh", "local"] ---> Running in d33ed9441c24 Removing intermediate container d33ed9441c24 ---> ccc81f3c2e44 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built ccc81f3c2e44 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-b51:master Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-b51:latest Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 2f1076a9662081232f36ba214e302baf46fa0acc CommitTime: 2022-02-13 22:06:01 +0100 CommitAuthor: Ole Schütt CommitSubject: Docker: Install CP2K from docker build context ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 59 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 166.458 166.458 qs_mol_dyn_low 1 2.0 0.004 0.004 165.648 165.648 qs_forces 11 3.9 0.002 0.002 165.588 165.588 qs_energies 11 4.9 0.001 0.001 155.045 155.045 scf_env_do_scf 11 5.9 0.001 0.001 122.196 122.196 velocity_verlet 10 3.0 0.002 0.002 116.371 116.371 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 82.623 82.623 init_scf_loop 11 6.9 0.000 0.000 39.375 39.375 prepare_preconditioner 11 7.9 0.000 0.000 35.371 35.371 make_preconditioner 11 8.9 0.000 0.000 35.371 35.371 make_full_inverse_cholesky 11 9.9 0.000 0.000 33.322 33.322 rebuild_ks_matrix 119 8.3 0.001 0.001 32.755 32.755 qs_ks_build_kohn_sham_matrix 119 9.3 0.018 0.018 32.754 32.754 qs_ks_update_qs_env 119 7.6 0.001 0.001 30.588 30.588 qs_rho_update_rho 119 7.7 0.001 0.001 29.396 29.396 calculate_rho_elec 119 8.7 1.568 1.568 29.395 29.395 qs_scf_new_mos 108 7.5 0.001 0.001 28.794 28.794 qs_scf_loop_do_ot 108 8.5 0.001 0.001 28.793 28.793 ot_scf_mini 108 9.5 0.003 0.003 26.895 26.895 dbcsr_multiply_generic 2286 12.5 0.197 0.197 24.537 24.537 grid_collocate_task_list 119 9.7 23.077 23.077 23.077 23.077 sum_up_and_integrate 119 10.3 0.400 0.400 20.469 20.469 integrate_v_rspace 119 11.3 0.508 0.508 20.069 20.069 cp_fm_cholesky_invert 11 10.9 19.547 19.547 19.547 19.547 grid_integrate_task_list 119 12.3 17.009 17.009 17.009 17.009 init_scf_run 11 5.9 0.001 0.001 16.963 16.963 scf_env_initial_rho_setup 11 6.9 0.001 0.001 16.962 16.962 ot_mini 108 10.5 0.001 0.001 16.201 16.201 wfi_extrapolate 11 7.9 0.001 0.001 16.097 16.097 cp_gemm 81 9.0 0.000 0.000 15.556 15.556 cp_gemm_cosma 81 10.0 15.556 15.556 15.556 15.556 make_m2s 4572 13.5 0.067 0.067 13.830 13.830 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 10.820 10.820 qs_ot_get_derivative 108 11.5 0.001 0.001 8.427 8.427 pw_transfer 1439 11.6 0.096 0.096 8.034 8.034 ot_diis_step 108 11.5 0.006 0.006 7.770 7.770 fft_wrap_pw1pw2 1201 12.6 0.010 0.010 7.708 7.708 cp_fm_cholesky_decompose 22 10.9 7.495 7.495 7.495 7.495 make_images 4572 14.5 2.696 2.696 7.294 7.294 dbcsr_make_dense_low 5837 15.5 0.108 0.108 6.704 6.704 qs_ot_get_p 119 10.4 0.001 0.001 6.591 6.591 make_dense_data 5837 16.5 5.825 5.825 6.573 6.573 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.560 6.560 apply_single 119 13.6 0.001 0.001 6.560 6.560 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.554 6.554 fft_wrap_pw1pw2_140 487 13.2 0.700 0.700 6.541 6.541 dbcsr_complete_redistribute 329 12.2 3.028 3.028 6.413 6.413 dbcsr_copy 2102 12.0 0.295 0.295 6.047 6.047 dbcsr_make_images_dense 3978 14.8 0.027 0.027 6.025 6.025 qs_env_update_s_mstruct 11 6.9 0.000 0.000 5.898 5.898 multiply_cannon 2286 13.5 0.952 0.952 5.882 5.882 dbcsr_copy_into_existing 22 7.9 5.699 5.699 5.699 5.699 qs_create_task_list 11 7.9 0.000 0.000 5.340 5.340 generate_qs_task_list 11 8.9 3.687 3.687 5.340 5.340 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.238 5.238 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.898 4.898 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.898 4.898 qs_ot_p2m_diag 50 11.0 0.218 0.218 4.877 4.877 density_rs2pw 119 9.7 0.007 0.007 4.751 4.751 pw_poisson_solve 119 10.3 2.023 2.023 4.749 4.749 multiply_cannon_loop 2286 14.5 0.070 0.070 4.347 4.347 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.332 4.332 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.303 4.303 cp_dbcsr_syevd 50 12.0 0.004 0.004 4.286 4.286 multiply_cannon_multrec 2286 15.5 4.202 4.202 4.276 4.276 cp_fm_diag_elpa 50 13.0 0.000 0.000 4.121 4.121 cp_fm_diag_elpa_base 50 14.0 4.065 4.065 4.121 4.121 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.986 3.986 fft3d_s 1202 14.6 3.469 3.469 3.476 3.476 copy_fm_to_dbcsr 176 11.2 0.002 0.002 3.440 3.440 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.012 73.466 73.468 qs_mol_dyn_low 1 2.0 0.005 0.005 73.335 73.342 qs_forces 11 3.9 0.002 0.002 73.279 73.280 qs_energies 11 4.9 0.001 0.002 68.277 68.280 scf_env_do_scf 11 5.9 0.001 0.001 61.529 61.530 scf_env_do_scf_inner_loop 108 6.5 0.003 0.011 57.063 57.063 velocity_verlet 10 3.0 0.002 0.003 43.975 43.977 rebuild_ks_matrix 119 8.3 0.001 0.001 28.278 28.322 qs_ks_build_kohn_sham_matrix 119 9.3 0.021 0.022 28.277 28.321 qs_ks_update_qs_env 119 7.6 0.001 0.002 25.090 25.129 qs_rho_update_rho 119 7.7 0.001 0.001 22.631 22.648 calculate_rho_elec 119 8.7 0.048 0.050 22.630 22.647 sum_up_and_integrate 119 10.3 0.047 0.050 22.195 22.222 integrate_v_rspace 119 11.3 0.005 0.005 22.147 22.175 dbcsr_multiply_generic 2286 12.5 0.132 0.135 16.939 17.020 grid_collocate_task_list 119 9.7 15.937 16.725 15.937 16.725 grid_integrate_task_list 119 12.3 15.658 16.266 15.658 16.266 qs_scf_new_mos 108 7.5 0.001 0.001 13.967 14.018 qs_scf_loop_do_ot 108 8.5 0.001 0.001 13.966 14.017 ot_scf_mini 108 9.5 0.003 0.004 13.087 13.136 multiply_cannon 2286 13.5 0.236 0.247 11.165 11.414 multiply_cannon_loop 2286 14.5 0.225 0.243 10.054 10.350 mp_waitall_1 169478 16.3 8.501 9.042 8.501 9.042 ot_mini 108 10.5 0.001 0.001 7.732 7.783 rs_pw_transfer 974 11.9 0.016 0.017 7.078 7.758 density_rs2pw 119 9.7 0.009 0.009 6.081 6.781 pw_transfer 1439 11.6 0.153 0.167 5.857 5.915 multiply_cannon_metrocomm3 18288 15.5 0.082 0.087 5.432 5.862 fft_wrap_pw1pw2 1201 12.6 0.014 0.015 5.548 5.603 potential_pw2rs 119 12.3 0.010 0.012 5.221 5.231 fft_wrap_pw1pw2_140 487 13.2 0.565 0.589 4.775 4.908 init_scf_run 11 5.9 0.000 0.002 4.641 4.641 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.641 4.641 init_scf_loop 11 6.9 0.000 0.001 4.448 4.449 wfi_extrapolate 11 7.9 0.001 0.001 4.255 4.256 fft3d_ps 1201 14.6 2.235 2.350 4.103 4.178 make_m2s 4572 13.5 0.075 0.077 3.945 4.003 qs_ot_get_derivative 108 11.5 0.001 0.001 3.907 3.954 ot_diis_step 108 11.5 0.005 0.005 3.784 3.785 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.699 3.762 apply_single 119 13.6 0.001 0.001 3.698 3.761 multiply_cannon_multrec 18288 15.5 3.434 3.602 3.452 3.620 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.410 3.416 make_images 4572 14.5 0.188 0.194 3.258 3.320 mp_waitany 9880 13.7 2.593 3.258 2.593 3.258 rs_pw_transfer_RS2PW_140 130 11.5 0.480 0.501 2.303 2.994 rs_pw_transfer_PW2RS_140 130 13.9 1.255 1.330 2.617 2.642 mp_alltoall_d11v 2130 13.8 1.560 2.070 1.560 2.070 qs_ot_get_p 119 10.4 0.001 0.001 1.871 1.914 cp_gemm 81 9.0 0.000 0.000 1.726 1.730 cp_gemm_cosma 81 10.0 1.726 1.730 1.726 1.730 rs_gather_matrices 119 12.3 0.131 0.143 1.211 1.725 make_images_data 4572 15.5 0.061 0.069 1.484 1.585 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.429 1.537 prepare_preconditioner 11 7.9 0.000 0.000 1.517 1.526 make_preconditioner 11 8.9 0.000 0.000 1.517 1.526 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=79.57199999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.077, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.547, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.009, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.556, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.495, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.202, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=25.617000000000004, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.937, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.658, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.726, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.434, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.501, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.593, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.039 0.039 218.605 218.605 qs_mol_dyn_low 1 2.0 0.004 0.004 217.779 217.779 qs_forces 11 3.9 0.002 0.002 217.720 217.720 qs_energies 11 4.9 0.001 0.001 203.599 203.599 scf_env_do_scf 11 5.9 0.001 0.001 166.533 166.533 velocity_verlet 10 3.0 0.002 0.002 148.079 148.079 scf_env_do_scf_inner_loop 96 6.5 0.009 0.009 123.643 123.643 rebuild_ks_matrix 107 8.3 0.001 0.001 62.203 62.203 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.017 62.203 62.203 qs_ks_update_qs_env 107 7.6 0.001 0.001 56.145 56.145 qs_rho_update_rho 107 7.7 0.001 0.001 55.140 55.140 calculate_rho_elec 107 8.7 1.389 1.389 55.140 55.140 sum_up_and_integrate 107 10.3 0.370 0.370 51.243 51.243 integrate_v_rspace 107 11.3 0.438 0.438 50.873 50.873 grid_collocate_task_list 107 9.7 49.538 49.538 49.538 49.538 grid_integrate_task_list 107 12.3 48.139 48.139 48.139 48.139 init_scf_loop 11 6.9 0.000 0.000 42.664 42.664 prepare_preconditioner 11 7.9 0.000 0.000 35.320 35.320 make_preconditioner 11 8.9 0.000 0.000 35.320 35.320 make_full_inverse_cholesky 11 9.9 0.000 0.000 33.315 33.315 qs_scf_new_mos 96 7.5 0.001 0.001 24.795 24.795 qs_scf_loop_do_ot 96 8.5 0.001 0.001 24.794 24.794 ot_scf_mini 96 9.5 0.003 0.003 23.118 23.118 dbcsr_multiply_generic 1966 12.4 0.174 0.174 20.924 20.924 init_scf_run 11 5.9 0.001 0.001 20.001 20.001 scf_env_initial_rho_setup 11 6.9 0.001 0.001 20.000 20.000 cp_fm_cholesky_invert 11 10.9 19.538 19.538 19.538 19.538 wfi_extrapolate 11 7.9 0.001 0.001 18.879 18.879 cp_gemm 81 9.0 0.000 0.000 15.521 15.521 cp_gemm_cosma 81 10.0 15.520 15.520 15.520 15.520 ot_mini 96 10.5 0.001 0.001 13.696 13.696 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.915 11.915 make_m2s 3932 13.4 0.058 0.058 11.590 11.590 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.733 7.733 cp_fm_cholesky_decompose 22 10.9 7.529 7.529 7.529 7.529 qs_ot_get_derivative 96 11.5 0.001 0.001 7.486 7.486 pw_transfer 1295 11.6 0.086 0.086 7.127 7.127 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.065 7.065 fft_wrap_pw1pw2 1081 12.6 0.009 0.009 6.836 6.836 qs_create_task_list 11 7.9 0.000 0.000 6.516 6.516 generate_qs_task_list 11 8.9 4.821 4.821 6.516 6.516 dbcsr_complete_redistribute 317 12.2 3.027 3.027 6.472 6.472 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.385 6.385 make_images 3932 14.4 2.308 2.308 6.237 6.237 ot_diis_step 96 11.5 0.005 0.005 6.206 6.206 qs_ot_get_p 107 10.4 0.001 0.001 5.919 5.919 fft_wrap_pw1pw2_140 439 13.2 0.571 0.571 5.786 5.786 dbcsr_copy 1855 11.9 0.269 0.269 5.743 5.743 dbcsr_make_dense_low 4961 15.5 0.091 0.091 5.515 5.515 dbcsr_copy_into_existing 22 7.9 5.422 5.422 5.422 5.422 make_dense_data 4961 16.5 4.796 4.796 5.405 5.405 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.309 5.309 apply_single 107 13.6 0.000 0.000 5.309 5.309 copy_dbcsr_to_fm 147 11.2 0.003 0.003 5.299 5.299 multiply_cannon 1966 13.4 0.778 0.778 5.220 5.220 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 4.975 4.975 calculate_w_matrix_ot 11 6.9 0.008 0.008 4.975 4.975 dbcsr_make_images_dense 3386 14.7 0.023 0.023 4.911 4.911 qs_ot_p2m_diag 44 11.0 0.183 0.183 4.495 4.495 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.011 130.273 130.274 qs_mol_dyn_low 1 2.0 0.005 0.005 130.153 130.159 qs_forces 11 3.9 0.002 0.002 130.096 130.097 qs_energies 11 4.9 0.001 0.001 121.184 121.186 scf_env_do_scf 11 5.9 0.001 0.001 111.325 111.327 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 103.405 103.406 velocity_verlet 10 3.0 0.002 0.002 77.822 77.823 rebuild_ks_matrix 107 8.3 0.001 0.001 59.271 59.308 qs_ks_build_kohn_sham_matrix 107 9.3 0.020 0.021 59.270 59.307 sum_up_and_integrate 107 10.3 0.043 0.046 53.703 53.751 integrate_v_rspace 107 11.3 0.004 0.005 53.660 53.707 qs_ks_update_qs_env 107 7.6 0.001 0.001 52.159 52.192 qs_rho_update_rho 107 7.7 0.001 0.001 49.926 49.944 calculate_rho_elec 107 8.7 0.043 0.045 49.925 49.943 grid_integrate_task_list 107 12.3 46.614 47.939 46.614 47.939 grid_collocate_task_list 107 9.7 42.876 44.190 42.876 44.190 dbcsr_multiply_generic 1966 12.4 0.116 0.118 15.299 15.507 qs_scf_new_mos 96 7.5 0.001 0.001 12.439 12.470 qs_scf_loop_do_ot 96 8.5 0.001 0.001 12.438 12.469 ot_scf_mini 96 9.5 0.003 0.003 11.655 11.685 multiply_cannon 1966 13.4 0.205 0.209 10.109 10.327 rs_pw_transfer 878 11.9 0.015 0.016 7.645 9.534 multiply_cannon_loop 1966 14.4 0.199 0.211 9.108 9.278 density_rs2pw 107 9.7 0.008 0.009 6.500 8.393 mp_waitall_1 146670 16.2 7.722 8.111 7.722 8.111 init_scf_loop 11 6.9 0.000 0.001 7.901 7.902 init_scf_run 11 5.9 0.000 0.002 7.727 7.727 scf_env_initial_rho_setup 11 6.9 0.000 0.001 7.726 7.727 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.324 7.330 wfi_extrapolate 11 7.9 0.001 0.001 7.088 7.088 ot_mini 96 10.5 0.001 0.001 6.854 6.885 mp_waitany 8968 13.7 3.483 5.416 3.483 5.416 pw_transfer 1295 11.6 0.137 0.146 5.301 5.367 multiply_cannon_metrocomm3 15728 15.4 0.072 0.075 4.925 5.312 fft_wrap_pw1pw2 1081 12.6 0.013 0.013 5.021 5.080 rs_pw_transfer_RS2PW_140 118 11.5 0.412 0.429 3.136 5.024 potential_pw2rs 107 12.3 0.009 0.010 4.897 4.908 fft_wrap_pw1pw2_140 439 13.2 0.507 0.535 4.318 4.469 mp_alltoall_d11v 1998 13.7 2.419 4.399 2.419 4.399 rs_gather_matrices 107 12.3 0.120 0.132 2.092 4.041 fft3d_ps 1081 14.6 2.022 2.146 3.710 3.768 make_m2s 3932 13.4 0.065 0.067 3.529 3.582 qs_ot_get_derivative 96 11.5 0.001 0.001 3.442 3.474 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.351 3.392 apply_single 107 13.6 0.001 0.001 3.351 3.392 ot_diis_step 96 11.5 0.004 0.005 3.378 3.378 multiply_cannon_multrec 15728 15.4 3.116 3.266 3.131 3.281 make_images 3932 14.4 0.165 0.169 2.924 2.987 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=78.34100000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=49.538, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.139, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=19.538, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=15.52, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.529, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=26.462000000000003, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=42.876, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.614, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.483, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.116, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.722, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.415 0.415 282.934 282.934 qs_energies 1 2.0 0.000 0.000 281.629 281.629 scf_env_do_scf 1 3.0 0.000 0.000 279.216 279.216 qs_ks_update_qs_env 8 5.0 0.000 0.000 260.988 260.988 rebuild_ks_matrix 7 6.0 0.000 0.000 260.882 260.882 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 260.882 260.882 hfx_ks_matrix 7 8.0 0.000 0.000 174.843 174.843 integrate_four_center 7 9.0 2.298 2.298 174.812 174.812 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 162.557 162.557 integrate_four_center_main 7 10.0 1.008 1.008 161.239 161.239 integrate_four_center_bin 457 11.0 160.231 160.231 160.231 160.231 init_scf_loop 1 4.0 0.000 0.000 116.645 116.645 cp_gemm 129 10.3 0.000 0.000 70.829 70.829 cp_gemm_cosma 129 11.3 70.829 70.829 70.829 70.829 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 40.528 40.528 admm_fit_mo_coeffs 7 9.0 0.000 0.000 38.691 38.691 admm_mo_merge_derivs 7 8.0 0.000 0.000 36.545 36.545 merge_mo_derivs_diag 7 9.0 0.022 0.022 36.545 36.545 purify_mo_diag 7 10.0 0.001 0.001 23.145 23.145 fit_mo_coeffs 7 10.0 0.000 0.000 15.546 15.546 prepare_preconditioner 1 5.0 0.000 0.000 14.447 14.447 make_preconditioner 1 6.0 0.000 0.000 14.447 14.447 integrate_four_center_load 7 10.0 0.000 0.000 10.887 10.887 hfx_load_balance 1 11.0 0.003 0.003 10.887 10.887 arnoldi_normal_ev 11 9.3 0.002 0.002 8.759 8.759 estimate_cond_num 1 7.0 0.000 0.000 8.681 8.681 build_subspace 28 9.5 0.014 0.014 8.654 8.654 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.217 0.223 192.334 192.335 qs_energies 1 2.0 0.000 0.000 191.972 191.972 scf_env_do_scf 1 3.0 0.000 0.000 191.393 191.394 qs_ks_update_qs_env 8 5.0 0.000 0.000 188.326 188.327 rebuild_ks_matrix 7 6.0 0.000 0.000 188.312 188.313 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 188.312 188.313 hfx_ks_matrix 7 8.0 0.000 0.001 176.099 176.101 integrate_four_center 7 9.0 0.102 0.413 176.083 176.084 integrate_four_center_main 7 10.0 0.005 0.005 158.821 163.420 integrate_four_center_bin 448 11.0 158.817 163.416 158.817 163.416 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 110.268 110.269 init_scf_loop 1 4.0 0.000 0.000 81.124 81.124 integrate_four_center_load 7 10.0 0.000 0.000 10.825 10.838 hfx_load_balance 1 11.0 0.001 0.001 10.825 10.838 mp_sync 70 11.3 5.588 10.127 5.588 10.127 hfx_load_balance_count 1 12.0 5.284 5.457 5.284 5.457 hfx_load_balance_bin 1 12.0 5.290 5.402 5.290 5.402 cp_gemm 129 10.3 0.000 0.001 4.939 4.945 cp_gemm_cosma 129 11.3 4.939 4.944 4.939 4.944 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=48.15300000000002, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=160.231, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=70.829, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.298, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=1.008, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.415, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=12.092000000000013, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=158.817, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=4.939, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.102, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.217, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.284, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.29, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=5.588, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 381.536 381.536 qs_energies 1 2.0 0.000 0.000 380.975 380.975 mp2_main 1 3.0 0.000 0.000 374.550 374.550 mp2_gpw_main 1 4.0 0.000 0.000 374.200 374.200 rpa_ri_compute_en 1 5.0 0.000 0.000 361.183 361.183 rpa_num_int 1 6.0 0.001 0.001 361.161 361.161 compute_mat_P_omega 1 7.0 0.002 0.002 179.555 179.555 compute_mat_P_omega_contract 10 8.0 12.413 12.413 177.925 177.925 dbt_total 2336 9.6 0.018 0.018 170.469 170.469 cp_gemm 105 8.4 0.000 0.000 152.211 152.211 cp_gemm_cosma 105 9.4 152.210 152.210 152.210 152.210 GW_matrix_operations 10 7.0 0.005 0.005 107.728 107.728 dbt_contract 787 11.0 49.286 49.286 105.171 105.171 compute_mat_P_omega_calc_M_occ 250 9.0 12.388 12.388 64.478 64.478 dbt_copy 1103 10.7 21.190 21.190 63.973 63.973 dbt_tas_total 1149 12.2 0.373 0.373 52.697 52.697 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 51.606 51.606 dbt_tas_multiply 807 12.1 0.004 0.004 51.400 51.400 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 49.659 49.659 dbt_tas_dbm 807 14.1 0.005 0.005 41.276 41.276 dbm_multiply 807 16.1 41.264 41.264 41.264 41.264 compute_mat_P_omega_calc_M_vir 250 9.0 0.002 0.002 39.832 39.832 dbt_tas_mm_1N 524 15.1 0.002 0.002 25.282 25.282 dbt_tas_copy 574 11.4 17.094 17.094 20.491 20.491 compute_QP_energies 1 7.0 0.000 0.000 20.290 20.290 compute_self_energy_cubic_gw 1 8.0 0.095 0.095 20.290 20.290 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 18.551 18.551 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 15.281 15.281 dbt_tas_mm_2 251 15.0 0.002 0.002 14.200 14.200 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 13.128 13.128 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 13.002 13.002 dbt_copy_nocomm 251 12.0 11.408 11.408 13.001 13.001 dbt_tas_reserve_blocks_index 3261 13.7 7.678 7.678 12.969 12.969 dbt_reserve_blocks_index 2280 12.5 1.649 1.649 10.870 10.870 dbt_reserve_blocks_index_array 2222 11.6 0.011 0.011 10.826 10.826 cp_fm_cholesky_invert 10 8.0 9.241 9.241 9.241 9.241 contract_cubic_gw 21 9.0 0.000 0.000 8.749 8.749 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.008 55.405 55.406 qs_energies 1 2.0 0.001 0.001 55.289 55.291 mp2_main 1 3.0 0.000 0.001 53.797 53.799 mp2_gpw_main 1 4.0 0.000 0.001 53.738 53.740 rpa_ri_compute_en 1 5.0 0.000 0.000 51.779 51.781 rpa_num_int 1 6.0 0.001 0.001 51.771 51.773 dbt_total 2336 9.6 0.018 0.020 39.331 39.335 compute_mat_P_omega 1 7.0 0.001 0.002 38.267 38.275 compute_mat_P_omega_contract 10 8.0 0.715 0.733 37.929 37.936 dbt_contract 787 11.0 1.839 2.033 28.704 28.715 dbt_tas_total 1149 12.2 0.082 0.090 24.997 24.997 dbt_tas_multiply 807 12.1 0.003 0.004 24.911 24.914 dbt_tas_dbm 807 14.1 0.005 0.006 17.136 17.138 dbm_multiply 807 16.1 13.699 14.555 13.699 14.555 compute_mat_P_omega_calc_M_occ 250 9.0 0.695 0.718 12.613 12.614 dbt_copy 1111 10.7 4.001 4.260 9.104 9.437 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.370 9.371 cp_gemm 105 8.4 0.000 0.000 8.981 8.994 cp_gemm_cosma 105 9.4 8.981 8.994 8.981 8.994 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 7.882 7.883 dbt_tas_mm_1N 524 15.1 0.002 0.003 6.877 7.463 dbt_tas_mm_2 251 15.0 0.002 0.003 7.269 7.270 mp_sync 8706 11.6 6.265 7.267 6.265 7.267 GW_matrix_operations 10 7.0 0.001 0.002 5.759 5.766 compute_QP_energies 1 7.0 0.000 0.000 4.201 4.201 compute_self_energy_cubic_gw 1 8.0 0.005 0.005 4.198 4.201 dbt_communicate_buffer 1098 11.7 0.096 0.103 3.639 3.766 mp_waitall_2 3776 14.7 3.468 3.715 3.468 3.715 contract_cubic_gw 21 9.0 0.000 0.000 3.169 3.169 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.167 3.167 contract_P_omega_with_mat_L 10 8.0 0.000 0.001 3.039 3.041 dbt_reserve_blocks_index 2849 12.4 0.108 0.117 2.533 2.835 dbt_reserve_blocks_index_array 2791 11.4 0.013 0.016 2.532 2.834 dbt_tas_reserve_blocks_index 3300 13.8 0.265 0.298 2.486 2.781 dbm_reserve_blocks 3696 14.8 2.293 2.580 2.293 2.580 dbt_tas_replicate 396 14.1 1.204 1.454 2.291 2.335 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.954 1.957 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 1.711 1.715 convert_to_new_pgrid 2421 14.1 0.038 0.043 1.614 1.700 dbm_copy 1608 15.1 1.566 1.657 1.566 1.657 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.518 1.522 scf_env_do_scf 1 3.0 0.000 0.000 1.433 1.433 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 1.433 1.433 mp_max_i 1992 9.8 1.032 1.285 1.032 1.285 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=100.49199999999996, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=152.21, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_contract", label="dbt_contract", y=49.286, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=41.264, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_copy", label="dbt_copy", y=21.19, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=17.094, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=17.152, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=8.981, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_contract", label="dbt_contract", y=1.839, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=13.699, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_copy", label="dbt_copy", y=4.001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.468, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.265, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 708.532 708.532 qs_forces 1 2.0 0.000 0.000 707.741 707.741 rebuild_ks_matrix 7 6.6 0.000 0.000 698.272 698.272 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 698.272 698.272 hfx_ks_matrix 7 8.6 0.000 0.000 695.308 695.308 dbt_total 1051 10.5 0.010 0.010 570.459 570.459 hfx_ri_update_ks 7 9.6 0.000 0.000 381.437 381.437 hfx_ri_update_ks_Pmat 7 10.6 63.668 63.668 381.430 381.430 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 362.651 362.651 dbt_contract 283 11.7 121.150 121.150 359.948 359.948 qs_energies 1 3.0 0.000 0.000 345.016 345.016 scf_env_do_scf 1 4.0 0.000 0.000 344.596 344.596 qs_ks_update_qs_env 8 6.0 0.000 0.000 335.627 335.627 hfx_ri_update_forces 1 7.0 0.024 0.024 313.865 313.865 dbt_tas_total 533 12.7 3.025 3.025 226.103 226.103 dbt_tas_multiply 292 12.8 0.001 0.001 221.401 221.401 dbt_copy 444 11.5 100.782 100.782 205.795 205.795 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 200.599 200.599 dbt_tas_dbm 292 14.8 0.002 0.002 197.748 197.748 dbm_multiply 292 16.8 197.743 197.743 197.743 197.743 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 166.703 166.703 init_scf_loop 2 5.0 0.000 0.000 143.995 143.995 dbt_tas_mm_2 119 16.0 0.001 0.001 110.236 110.236 hfx_ri_forces_Pmat_2c_inv_2 9 8.0 0.000 0.000 58.321 58.321 hfx_ri_forces_Pmat_metric 9 8.0 0.001 0.001 55.141 55.141 precalc_derivatives 1 8.0 0.008 0.008 54.826 54.826 dbt_tas_mm_3N 94 14.7 0.000 0.000 54.734 54.734 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 45.678 45.678 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 44.696 44.696 dbt_tas_reserve_blocks_index 1569 14.6 16.568 16.568 38.342 38.342 dbt_tas_copy 287 12.3 28.674 28.674 36.555 36.555 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 34.896 34.896 dbt_reserve_blocks_index 1020 13.6 3.590 3.590 33.623 33.623 dbt_reserve_blocks_index_array 999 12.6 0.008 0.008 33.327 33.327 dbt_tas_mm_3T 77 17.1 0.000 0.000 32.595 32.595 hfx_ri_forces_Pmat_PQ_der 9 8.0 1.410 1.410 30.482 30.482 hfx_ri_forces_Pmat_Pmat_2 9 8.0 0.000 0.000 28.967 28.967 build_3c_derivatives 2 9.0 1.361 1.361 27.690 27.690 hfx_ri_forces_Pmat_2c_inv_1 1 8.0 4.288 4.288 27.310 27.310 get_force_from_3c_trace 18 8.0 25.505 25.505 25.505 25.505 dbt_split_blocks_generic 138 11.7 17.076 17.076 20.774 20.774 dbt_communicate_buffer 157 12.8 20.769 20.769 20.769 20.769 dbt_split_copyback 69 11.7 16.503 16.503 18.733 18.733 dbm_reserve_blocks 1813 15.4 18.038 18.038 18.038 18.038 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.011 108.354 108.355 qs_forces 1 2.0 0.000 0.000 108.154 108.154 rebuild_ks_matrix 7 6.6 0.000 0.000 107.007 107.009 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.003 107.007 107.009 hfx_ks_matrix 7 8.6 0.000 0.001 105.310 105.310 dbt_total 1051 10.5 0.010 0.012 96.739 96.740 dbt_contract 283 11.7 5.121 5.407 76.166 76.188 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 71.926 71.927 dbt_tas_total 533 12.7 0.135 0.360 69.865 69.866 hfx_ri_update_forces 1 7.0 0.003 0.003 67.032 67.033 dbt_tas_multiply 292 12.8 0.001 0.002 66.739 66.741 dbt_tas_dbm 292 14.8 0.002 0.003 51.284 51.285 dbm_multiply 292 16.8 45.751 47.704 45.751 47.704 hfx_ri_update_ks 7 9.6 0.000 0.000 38.276 38.276 hfx_ri_update_ks_Pmat 7 10.6 2.546 2.742 38.275 38.275 qs_energies 1 3.0 0.000 0.000 36.210 36.210 scf_env_do_scf 1 4.0 0.000 0.000 35.988 35.988 qs_ks_update_qs_env 8 6.0 0.000 0.000 35.082 35.083 dbt_tas_mm_2 119 16.0 0.001 0.002 22.102 22.102 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 20.420 20.420 hfx_ri_forces_Pmat_metric 9 8.0 0.001 0.001 20.088 20.088 dbt_copy 464 11.6 5.933 6.120 16.519 16.880 init_scf_loop 2 5.0 0.000 0.000 15.567 15.567 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 14.741 14.741 dbt_tas_mm_3N 94 14.7 0.001 0.001 13.032 13.724 dbt_tas_mm_3T 77 17.1 0.000 0.001 12.964 13.585 mp_sync 3797 12.3 9.844 12.887 9.844 12.887 hfx_ri_forces_Pmat_2c_inv_2 9 8.0 0.000 0.001 12.389 12.389 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 9.936 9.936 hfx_ri_forces_Pmat_PQ_der 9 8.0 0.068 0.072 9.633 9.633 hfx_ri_forces_Pmat_Pmat_2 9 8.0 0.000 0.000 8.347 8.370 dbt_tas_reserve_blocks_index 1602 14.7 0.571 0.607 6.696 7.154 dbm_reserve_blocks 1848 15.6 6.510 7.119 6.510 7.119 mp_waitall_2 1484 15.5 6.400 6.866 6.400 6.866 dbt_reserve_blocks_index 1191 13.5 0.166 0.176 5.730 6.165 dbt_reserve_blocks_index_array 1170 12.5 0.009 0.010 5.691 6.123 precalc_derivatives 1 8.0 0.003 0.003 5.962 5.962 dbt_tas_replicate 246 14.5 1.666 2.544 5.362 5.783 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.753 4.753 hfx_ri_forces_Pmat_2c_inv_1 1 8.0 0.160 0.169 4.718 4.722 convert_to_new_pgrid 876 14.8 0.034 0.040 3.830 4.497 dbm_copy 578 15.8 3.761 4.429 3.761 4.429 dbt_tas_communicate_buffer 498 15.8 0.021 0.025 3.450 3.853 dbt_communicate_buffer 328 12.3 0.023 0.025 3.334 3.453 build_3c_derivatives 2 9.0 0.609 0.665 3.320 3.334 dbt_tas_replicate_communicate_ 127 15.0 0.003 0.005 2.738 3.113 hfx_ri_forces_Pmat_Pmat_1 3 8.0 0.000 0.000 2.740 2.740 dbt_tas_copy 141 13.3 1.391 1.469 2.441 2.622 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 2.555 2.567 mp_sum_l 8004 13.7 1.691 2.359 1.691 2.359 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 2.254 2.255 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=178.47699999999998, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=197.743, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_contract", label="dbt_contract", y=121.15, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_copy", label="dbt_copy", y=100.782, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=63.668, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=28.674, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=18.038, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=24.858000000000004, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=45.751, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_contract", label="dbt_contract", y=5.121, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_copy", label="dbt_copy", y=5.933, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=2.546, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=1.391, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.51, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=9.844, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=6.4, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.099 0.099 195.385 195.385 qs_energies 1 2.0 0.000 0.000 193.576 193.576 scf_env_do_scf 1 3.0 0.000 0.000 183.323 183.323 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 183.323 183.323 qs_scf_new_mos 15 5.0 0.000 0.000 80.945 80.945 qs_ks_update_qs_env 15 5.0 0.000 0.000 70.960 70.960 rebuild_ks_matrix 15 6.0 0.000 0.000 70.590 70.590 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 70.590 70.590 eigensolver 15 6.0 0.002 0.002 67.208 67.208 cp_fm_diag_elpa 15 7.0 0.000 0.000 52.421 52.421 cp_fm_diag_elpa_base 15 8.0 47.665 47.665 52.421 52.421 qs_vxc_create 15 8.0 0.040 0.040 46.431 46.431 calculate_dispersion_nonloc 15 9.0 9.187 9.187 40.344 40.344 pw_transfer 1191 9.8 0.097 0.097 27.680 27.680 fft_wrap_pw1pw2 1086 10.9 0.013 0.013 27.381 27.381 qs_rho_update_rho 16 5.0 0.000 0.000 25.053 25.053 calculate_rho_elec 16 6.0 0.344 0.344 25.053 25.053 grid_collocate_task_list 16 7.0 23.463 23.463 23.463 23.463 sum_up_and_integrate 15 8.0 0.082 0.082 22.513 22.513 integrate_v_rspace 15 9.0 0.033 0.033 22.431 22.431 grid_integrate_task_list 15 10.0 21.776 21.776 21.776 21.776 fft_wrap_pw1pw2_150 765 12.0 3.502 3.502 20.821 20.821 fft3d_s 1087 12.8 11.301 11.301 11.313 11.313 copy_dbcsr_to_fm 16 5.9 0.001 0.001 10.889 10.889 pw_scatter_s 585 13.0 10.799 10.799 10.799 10.799 cp_fm_cholesky_restore 45 7.0 10.294 10.294 10.294 10.294 dbcsr_complete_redistribute 46 8.3 3.530 3.530 9.720 9.720 cp_fm_upper_to_full 30 8.0 9.246 9.246 9.246 9.246 vdW_energy 15 10.0 7.931 7.931 7.931 7.931 gspace_mixing 14 5.0 0.273 0.273 7.674 7.674 broyden_mixing 14 6.0 6.923 6.923 6.923 6.923 fft_wrap_pw1pw2_200 197 11.5 0.357 0.357 6.289 6.289 xc_vxc_pw_create 15 9.0 1.668 1.668 6.047 6.047 init_scf_run 1 3.0 0.000 0.000 4.765 4.765 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.658 4.658 dbcsr_finalize 159 9.9 0.020 0.020 4.250 4.250 dbcsr_merge_all 91 11.1 0.077 0.077 4.096 4.096 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.016 87.734 87.736 qs_energies 1 2.0 0.000 0.000 87.355 87.355 scf_env_do_scf 1 3.0 0.000 0.000 82.176 82.176 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 82.176 82.176 qs_ks_update_qs_env 15 5.0 0.000 0.000 39.926 39.947 rebuild_ks_matrix 15 6.0 0.000 0.000 39.875 39.897 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.004 39.875 39.896 qs_rho_update_rho 16 5.0 0.000 0.000 23.637 23.643 calculate_rho_elec 16 6.0 0.011 0.012 23.637 23.642 sum_up_and_integrate 15 8.0 0.014 0.016 23.481 23.519 integrate_v_rspace 15 9.0 0.001 0.001 23.467 23.506 grid_collocate_task_list 16 7.0 21.716 22.378 21.716 22.378 grid_integrate_task_list 15 10.0 21.472 22.345 21.472 22.345 qs_scf_new_mos 15 5.0 0.001 0.001 19.022 19.146 eigensolver 15 6.0 0.002 0.002 17.493 17.507 qs_vxc_create 15 8.0 0.001 0.001 15.857 15.875 calculate_dispersion_nonloc 15 9.0 1.415 1.545 12.910 12.925 cp_fm_diag_elpa 15 7.0 0.000 0.000 12.708 12.715 cp_fm_diag_elpa_base 15 8.0 12.443 12.482 12.703 12.708 pw_transfer 1191 9.8 0.136 0.142 12.148 12.263 fft_wrap_pw1pw2 1086 10.9 0.020 0.023 11.856 11.989 fft3d_ps 1086 12.9 5.107 5.281 8.990 9.236 fft_wrap_pw1pw2_150 765 12.0 0.646 0.674 7.959 8.047 cp_fm_cholesky_restore 45 7.0 4.541 4.600 4.541 4.600 fft_wrap_pw1pw2_200 197 11.5 0.372 0.397 3.734 3.815 qs_energies_init_hamiltonians 1 3.0 0.000 0.001 3.224 3.224 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.763 3.051 mp_alltoall_z22v 1086 14.9 2.471 2.974 2.471 2.974 xc_vxc_pw_create 15 9.0 0.061 0.080 2.946 2.960 rs_pw_transfer 158 9.4 0.002 0.003 1.804 2.624 density_rs2pw 16 7.0 0.002 0.002 1.740 2.269 x_to_yz 585 14.0 0.910 0.938 2.121 2.229 vdW_energy 15 10.0 2.126 2.226 2.126 2.226 mp_waitany 520 11.3 1.195 2.084 1.195 2.084 mp_alltoall_d11v 217 9.2 1.352 2.077 1.352 2.077 build_core_ppnl 1 5.0 1.847 2.049 1.847 2.049 yz_to_x 501 13.7 0.471 0.528 1.730 2.043 rs_pw_transfer_RS2PW_200 18 8.8 0.072 0.079 1.000 1.958 rs_gather_matrices 15 10.0 0.036 0.041 1.084 1.804 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=70.087, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=47.665, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.463, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.776, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.301, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.799, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.294, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=22.455, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=12.443, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.716, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.472, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=4.541, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=5.107, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.082 0.082 324.329 324.329 qs_energies 1 2.0 0.000 0.000 324.172 324.172 ls_scf 1 3.0 0.000 0.000 322.358 322.358 ls_scf_main 1 4.0 0.002 0.002 308.105 308.105 density_matrix_trs4 11 5.0 0.011 0.011 181.146 181.146 ls_scf_dm_to_ks 11 5.0 0.000 0.000 119.740 119.740 dbcsr_multiply_generic 185 6.1 0.635 0.635 115.565 115.565 matrix_ls_to_qs 11 6.0 0.000 0.000 115.348 115.348 multiply_cannon 185 7.1 3.418 3.418 79.523 79.523 dbcsr_copy_into_existing 11 7.0 61.602 61.602 61.602 61.602 dbcsr_complete_redistribute 23 7.5 43.167 43.167 58.980 58.980 multiply_cannon_loop 185 8.1 0.404 0.404 57.225 57.225 multiply_cannon_multrec 185 9.1 54.924 54.924 54.995 54.995 matrix_decluster 11 7.0 0.000 0.000 53.744 53.744 arnoldi_extremal 12 6.1 0.000 0.000 46.001 46.001 arnoldi_normal_ev 12 7.1 0.027 0.027 46.001 46.001 build_subspace 23 8.1 0.132 0.132 45.338 45.338 dbcsr_matrix_vector_mult 652 9.0 0.254 0.254 35.378 35.378 dbcsr_matrix_vector_mult_local 652 10.0 33.823 33.823 33.831 33.831 make_m2s 370 7.1 0.031 0.031 29.537 29.537 make_images 370 8.1 7.578 7.578 27.038 27.038 dbcsr_finalize 646 7.5 0.210 0.210 21.616 21.616 dbcsr_merge_all 597 8.5 3.764 3.764 19.526 19.526 setup_rec_index_2d 370 8.1 18.660 18.660 18.660 18.660 dbcsr_sort_indices 1103 9.9 15.095 15.095 15.095 15.095 tree_to_linear_d 110 9.4 13.468 13.468 13.468 13.468 ls_scf_init_scf 1 4.0 0.000 0.000 13.194 13.194 quick_finalize 395 10.0 0.493 0.493 12.869 12.869 ls_scf_init_matrix_S 1 5.0 0.000 0.000 12.721 12.721 dbcsr_special_finalize 370 9.1 0.003 0.003 11.877 11.877 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 11.810 11.810 dbcsr_dot_sd 144 6.3 9.288 9.288 9.289 9.289 dbcsr_frobenius_norm 142 6.1 7.999 7.999 8.002 8.002 matrix_qs_to_ls 12 5.1 0.000 0.000 7.516 7.516 matrix_cluster 12 6.1 0.000 0.000 7.515 7.515 make_images_data 370 9.1 0.011 0.011 7.427 7.427 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.011 96.740 96.741 qs_energies 1 2.0 0.000 0.000 96.635 96.635 ls_scf 1 3.0 0.000 0.000 96.554 96.555 ls_scf_main 1 4.0 0.000 0.003 92.684 92.685 density_matrix_trs4 11 5.0 0.009 0.013 88.862 88.934 dbcsr_multiply_generic 185 6.1 0.079 0.094 83.268 83.511 multiply_cannon 185 7.1 0.045 0.049 69.928 71.004 multiply_cannon_loop 185 8.1 0.239 0.253 65.941 68.020 multiply_cannon_multrec 1480 9.1 43.944 45.857 44.459 46.372 mp_waitall_1 11936 10.3 19.006 20.561 19.006 20.561 multiply_cannon_metrocomm3 1480 9.1 0.021 0.023 11.379 14.551 make_m2s 370 7.1 0.035 0.039 9.368 9.467 make_images 370 8.1 0.704 0.732 9.245 9.343 multiply_cannon_metrocomm1 1480 9.1 0.012 0.013 4.273 6.245 calculate_norms 2960 9.1 5.512 5.647 5.512 5.647 arnoldi_extremal 12 6.1 0.000 0.001 4.154 4.161 arnoldi_normal_ev 12 7.1 0.002 0.008 4.154 4.161 make_images_data 370 9.1 0.013 0.015 3.767 4.135 build_subspace 23 8.1 0.040 0.053 4.015 4.019 mp_sum_l 1039 5.9 2.605 3.725 2.605 3.725 dbcsr_matrix_vector_mult 652 9.0 0.019 0.083 3.297 3.441 hybrid_alltoall_any 393 9.9 0.334 1.682 3.142 3.383 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.298 3.372 dbcsr_complete_redistribute 23 7.5 1.912 2.012 2.964 3.078 matrix_ls_to_qs 11 6.0 0.000 0.000 2.923 3.051 ls_scf_init_scf 1 4.0 0.000 0.000 2.996 2.997 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.958 2.966 matrix_decluster 11 7.0 0.000 0.005 2.655 2.775 make_images_pack 370 9.1 2.570 2.763 2.575 2.767 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.715 2.717 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 1.688 2.662 dbcsr_matrix_vector_mult_local 652 10.0 2.496 2.641 2.500 2.646 buffer_matrices_ensure_size 370 8.1 2.352 2.485 2.352 2.485 dbcsr_add_d 280 6.0 0.002 0.002 2.120 2.199 dbcsr_add_anytype 280 7.0 1.155 1.226 2.118 2.197 dbcsr_finalize 646 7.5 0.015 0.015 1.999 2.127 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=112.15300000000002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=61.602, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=54.924, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=43.167, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=33.823, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.66, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=18.695000000000007, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=43.944, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.912, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.496, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.512, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.605, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=19.006, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.57, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.005 0.005 103.896 103.896 lib_test 1 2.0 0.000 0.000 103.890 103.890 dbcsr_run_tests 3 3.0 0.003 0.003 103.890 103.890 test_multiplies_multiproc 3 4.0 0.001 0.001 83.719 83.719 dbcsr_redistribute 9 5.0 54.701 54.701 58.227 58.227 dbcsr_multiply_generic 9 5.0 0.001 0.001 23.490 23.490 dbcsr_make_random_matrix 9 4.0 14.632 14.632 20.075 20.075 multiply_cannon 9 6.0 0.002 0.002 16.726 16.726 multiply_cannon_loop 9 7.0 0.003 0.003 16.210 16.210 multiply_cannon_multrec 9 8.0 16.206 16.206 16.207 16.207 dbcsr_finalize 27 5.7 0.005 0.005 9.318 9.318 dbcsr_merge_all 18 6.5 3.305 3.305 8.553 8.553 tree_to_linear_d 9 7.0 3.295 3.295 3.295 3.295 mp_alltoall_d11v 27 6.0 3.185 3.185 3.185 3.185 dbcsr_data_release 975 7.6 2.583 2.583 2.583 2.583 make_m2s 18 6.0 0.001 0.001 2.281 2.281 make_images 18 7.0 0.710 0.710 2.208 2.208 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 27.550 27.551 lib_test 1 2.0 0.000 0.001 27.519 27.541 dbcsr_run_tests 3 3.0 0.000 0.001 27.517 27.539 test_multiplies_multiproc 3 4.0 0.001 0.001 26.350 26.441 dbcsr_multiply_generic 9 5.0 0.001 0.002 24.351 24.433 multiply_cannon 9 6.0 0.002 0.003 21.898 22.315 multiply_cannon_loop 9 7.0 0.004 0.004 21.419 21.867 multiply_cannon_multrec 72 8.0 18.044 18.752 18.045 18.753 mp_waitall_1 576 9.2 3.787 4.558 3.787 4.558 multiply_cannon_metrocomm1 72 8.0 0.002 0.002 2.979 3.712 mp_sum_l 310 2.7 0.610 1.230 0.610 1.230 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.605 1.225 dbcsr_make_random_matrix 9 4.0 0.898 0.955 1.121 1.159 make_m2s 18 6.0 0.001 0.001 0.985 1.066 make_images 18 7.0 0.026 0.028 0.981 1.063 dbcsr_finalize 27 5.7 0.001 0.001 0.909 1.002 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.384 0.911 dbcsr_merge_all 18 6.5 0.148 0.171 0.792 0.891 dbcsr_data_release 444 7.6 0.710 0.828 0.710 0.828 dbcsr_redistribute 9 5.0 0.418 0.473 0.721 0.766 dbcsr_destroy 111 5.9 0.009 0.059 0.604 0.723 make_images_data 18 8.0 0.001 0.001 0.477 0.592 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.173999999999992, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=54.701, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=16.206, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=14.632, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.305, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.295, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.583, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.9350000000000023, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.418, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=18.044, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.898, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.148, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.71, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.61, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.787, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.050 0.050 151.266 151.266 qs_mol_dyn_low 1 2.0 0.004 0.004 149.268 149.268 velocity_verlet 5 3.0 0.004 0.004 122.028 122.028 qmmm_el_coupling 6 3.8 0.000 0.000 71.465 71.465 qmmm_elec_with_gaussian 6 4.8 0.169 0.169 71.459 71.459 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 70.476 70.476 qmmm_elec_gaussian_low_G 6 6.8 69.226 69.226 69.226 69.226 qs_forces 6 3.8 0.001 0.001 58.445 58.445 qs_energies 6 4.8 0.000 0.000 52.010 52.010 scf_env_do_scf 6 5.8 0.000 0.000 48.039 48.039 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 40.336 40.336 rebuild_ks_matrix 45 8.4 0.000 0.000 39.838 39.838 qs_ks_build_kohn_sham_matrix 45 9.4 0.007 0.007 39.838 39.838 qs_ks_update_qs_env 45 7.8 0.000 0.000 34.138 34.138 pw_transfer 966 11.9 0.071 0.071 24.002 24.002 fft_wrap_pw1pw2 801 13.0 0.008 0.008 23.653 23.653 fft_wrap_pw1pw2_150 507 14.3 2.500 2.500 23.102 23.102 qs_vxc_create 45 10.4 0.001 0.001 21.604 21.604 xc_vxc_pw_create 45 11.4 4.420 4.420 21.604 21.604 qs_rho_update_rho 45 7.9 0.000 0.000 10.594 10.594 calculate_rho_elec 45 8.9 0.903 0.903 10.593 10.593 pw_scatter_s 429 15.4 10.544 10.544 10.544 10.544 fist_calc_energy_force 6 3.8 0.002 0.002 10.444 10.444 xc_rho_set_and_dset_create 45 12.4 0.252 0.252 9.909 9.909 force_nonbond 6 4.8 9.207 9.207 9.207 9.207 fft3d_s 802 15.0 9.189 9.189 9.199 9.199 qmmm_forces 6 3.8 0.001 0.001 8.102 8.102 init_scf_loop 6 6.8 0.000 0.000 7.696 7.696 qmmm_forces_with_gaussian 6 4.8 0.126 0.126 7.576 7.576 pw_integral_ab 2539 7.4 7.564 7.564 7.564 7.564 qs_ks_ddapc 45 10.4 0.001 0.001 6.676 6.676 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.482 6.482 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.712 5.712 qmmm_forces_gaussian_low_G 6 6.8 5.407 5.407 5.407 5.407 pw_poisson_solve 51 9.9 2.330 2.330 5.367 5.367 grid_collocate_task_list 45 9.9 4.898 4.898 4.898 4.898 density_rs2pw 45 9.9 0.003 0.003 4.791 4.791 sum_up_and_integrate 45 10.4 0.244 0.244 4.402 4.402 integrate_v_rspace 45 11.4 0.012 0.012 4.158 4.158 cp_ddapc_apply_CD 45 11.4 0.006 0.006 4.135 4.135 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.037 90.674 90.675 qs_mol_dyn_low 1 2.0 0.004 0.005 89.105 89.194 qs_forces 6 3.8 0.001 0.001 66.370 66.371 qs_energies 6 4.8 0.001 0.001 63.329 63.329 scf_env_do_scf 6 5.8 0.000 0.001 61.748 61.748 scf_env_do_scf_inner_loop 113 6.2 0.003 0.009 59.292 59.294 rebuild_ks_matrix 119 8.1 0.000 0.001 43.753 43.771 qs_ks_build_kohn_sham_matrix 119 9.1 0.022 0.022 43.753 43.771 qs_ks_update_qs_env 119 7.3 0.001 0.001 41.173 41.189 velocity_verlet 5 3.0 0.002 0.003 36.853 36.858 pw_transfer 2446 11.8 0.301 0.313 28.132 28.410 fft_wrap_pw1pw2 2059 12.8 0.034 0.036 27.266 27.597 fft_wrap_pw1pw2_150 1321 14.0 2.369 2.536 26.380 26.593 qs_vxc_create 119 10.1 0.004 0.004 22.168 22.173 xc_vxc_pw_create 119 11.1 0.481 0.657 22.164 22.169 fft3d_ps 2059 14.8 12.160 13.184 20.596 21.117 qs_rho_update_rho 119 7.3 0.001 0.001 17.080 17.082 calculate_rho_elec 119 8.3 0.087 0.097 17.079 17.081 sum_up_and_integrate 119 10.1 0.090 0.099 15.765 15.807 integrate_v_rspace 119 11.1 0.005 0.005 15.675 15.713 qmmm_forces 6 3.8 0.003 0.003 12.651 12.651 rs_pw_transfer 988 11.5 0.016 0.018 12.033 12.559 qmmm_forces_with_gaussian 6 4.8 0.428 0.521 12.285 12.498 density_rs2pw 119 9.3 0.011 0.012 10.441 10.904 xc_rho_set_and_dset_create 119 12.1 0.522 0.622 10.351 10.697 potential_pw2rs 119 12.1 0.011 0.013 9.465 9.484 qmmm_el_coupling 6 3.8 0.000 0.000 8.917 8.962 qmmm_elec_with_gaussian 6 4.8 0.375 0.485 8.913 8.958 mp_alltoall_z22v 2059 16.8 5.214 7.134 5.214 7.134 grid_collocate_task_list 119 9.3 6.322 6.710 6.322 6.710 grid_integrate_task_list 119 12.1 5.682 6.021 5.682 6.021 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.806 5.988 rs_pw_transfer_PW2RS_150 125 13.9 2.683 2.780 5.266 5.302 yz_to_x 964 15.3 1.202 1.372 3.839 5.102 qmmm_forces_gaussian_low_G 6 6.8 4.753 4.911 4.753 4.911 pw_restrict_s3 18 5.8 2.513 2.572 4.777 4.828 x_to_yz 1095 16.3 1.968 2.165 4.546 4.812 rs_pw_transfer_RS2PW_150 125 11.2 2.016 2.158 4.234 4.777 mp_waitany 4028 12.8 3.802 4.737 3.802 4.737 qs_scf_new_mos 113 7.2 0.001 0.001 4.046 4.058 qs_scf_loop_do_ot 113 8.2 0.001 0.001 4.045 4.057 ot_scf_mini 113 9.2 0.002 0.002 3.868 3.876 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.786 3.839 pw_prolongate_s3 18 6.8 1.989 2.035 3.786 3.839 dbcsr_multiply_generic 2588 12.3 0.101 0.115 3.606 3.670 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.374 3.462 qs_ks_ddapc 119 10.1 0.003 0.003 3.017 3.155 pw_integral_ab 2761 7.7 2.345 2.378 2.747 2.956 mp_sum_dm3 33 5.7 2.441 2.690 2.441 2.690 pw_gather_p 964 14.3 2.220 2.599 2.220 2.599 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.593 2.594 qmmm_elec_gaussian_low_G 6 6.8 2.442 2.509 2.442 2.509 ot_mini 113 10.2 0.001 0.001 2.449 2.460 init_scf_loop 6 6.8 0.000 0.000 2.451 2.452 mp_waitall_1 188862 16.2 2.210 2.388 2.210 2.388 pw_scatter_p 1095 15.3 1.989 2.079 1.989 2.079 pw_derive 732 12.5 1.734 1.949 1.734 1.949 qs_ot_get_derivative 113 11.2 0.001 0.001 1.936 1.943 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=35.23100000000001, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=69.226, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.544, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=9.207, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.189, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=7.564, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.407, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.898, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=51.756, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.442, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=2.345, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.753, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.322, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.682, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.214, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=12.16, yerr=0.0 Summary: Performance test took 56 minutes. Status: OK Uploading artifacts... done EndDate: 2022-02-14 12:20:51+00:00