StartDate: 2024-04-24 08:04:31+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: b279b6b5e665c770a08400e4d41408713a49bccd CommitTime: 2024-04-23 15:49:24 +0200 CommitAuthor: Matthias Krack CommitSubject: Intel oneapi-hpckit 2023.2.1 -> 2024.0.1 #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=b279b6b5e665c770a08400e4d41408713a49bccd Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 394.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 3c645031de29: Pulling fs layer 3c645031de29: Download complete 3c645031de29: Pull complete Digest: sha256:1b8d8ff4777f36f19bfe73ee4df61e3a0b789caeff29caa019539ec7c9a57f95 Status: Downloaded newer image for ubuntu:22.04 ---> 7af9ba4f0a47 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 9dd3c38d749f Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 85614924da4a Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 7aff2b7aa738 Step 5/42 : RUN mkdir scripts ---> Using cache ---> 396b1dbd6ede Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 647732b64fd5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 55700dee0a5a Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> 53d4008825ec Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> b7ed63d2aec8 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> ba960a28d000 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 5e0dd371d8da Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> b6411bdd5988 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> a337dd578665 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> a805edf5d445 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 89c3fab60b10 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 11ede4addb4c Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 28d01e379245 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 432ae994d46f Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> aa3695fb1a8a Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 2385437ba30d Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 99c9de45acee Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> f51168b70480 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 0d490d89a462 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> bf61ef83c357 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> f54ba4d1feaf Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 65095d18cd26 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 3909634d023e Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 3050171b81d5 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> b59dffc80ca7 Step 30/42 : COPY ./Makefile . ---> Using cache ---> 5dca7b701d23 Step 31/42 : COPY ./src ./src ---> Using cache ---> a8f7df1f1aae Step 32/42 : COPY ./exts ./exts ---> Using cache ---> e132b4ebebb6 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> b89f95895b4c Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/" ---> Running in 9a6b9ff0bd5f './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Removing intermediate container 9a6b9ff0bd5f ---> bcf742595733 Step 35/42 : COPY ./data ./data ---> 4f4f65f42c97 Step 36/42 : COPY ./tests ./tests ---> bb0ac6cb67a0 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 633189e19af3 Step 38/42 : COPY ./benchmarks ./benchmarks ---> b6c2d78e9a6e Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 0c1dc93bd5c5 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in e38613649f3a ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 75 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 86.093 86.093 qs_mol_dyn_low 1 2.0 0.003 0.003 85.529 85.529 qs_forces 11 3.9 0.001 0.001 85.488 85.488 qs_energies 11 4.9 0.001 0.001 79.402 79.402 scf_env_do_scf 11 5.9 0.001 0.001 68.698 68.698 scf_env_do_scf_inner_loop 108 6.5 0.013 0.013 55.203 55.203 velocity_verlet 10 3.0 0.002 0.002 55.154 55.154 qs_scf_new_mos 108 7.5 0.001 0.001 20.637 20.637 qs_scf_loop_do_ot 108 8.5 0.001 0.001 20.636 20.636 rebuild_ks_matrix 119 8.3 0.001 0.001 20.502 20.502 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 20.501 20.501 dbcsr_multiply_generic 2286 12.5 0.154 0.154 20.335 20.335 qs_rho_update_rho_low 119 7.7 0.001 0.001 19.589 19.589 calculate_rho_elec 119 8.7 0.952 0.952 19.588 19.588 ot_scf_mini 108 9.5 0.002 0.002 19.360 19.360 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.721 18.721 grid_collocate_task_list 119 9.7 14.714 14.714 14.714 14.714 sum_up_and_integrate 119 10.3 0.001 0.001 13.280 13.280 init_scf_loop 11 6.9 0.000 0.000 13.257 13.257 integrate_v_rspace 119 11.3 0.107 0.107 13.246 13.246 ot_mini 108 10.5 0.001 0.001 12.339 12.339 make_m2s 4572 13.5 0.045 0.045 10.941 10.941 grid_integrate_task_list 119 12.3 10.846 10.846 10.846 10.846 prepare_preconditioner 11 7.9 0.000 0.000 10.808 10.808 make_preconditioner 11 8.9 0.000 0.000 10.808 10.808 make_full_inverse_cholesky 11 9.9 0.022 0.022 9.602 9.602 fft_wrap_pw1pw2 1201 11.6 0.009 0.009 6.959 6.959 qs_ot_get_derivative 108 11.5 0.001 0.001 6.730 6.730 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.480 6.480 fft_wrap_pw1pw2_140 487 12.2 1.172 1.172 6.021 6.021 dbcsr_make_dense_low 5837 15.5 0.066 0.066 5.793 5.793 make_dense_data 5837 16.5 5.238 5.238 5.713 5.713 make_images 4572 14.5 2.290 2.290 5.628 5.628 ot_diis_step 108 11.5 0.003 0.003 5.606 5.606 multiply_cannon 2286 13.5 0.171 0.171 5.516 5.516 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.162 5.162 apply_single 119 13.6 0.000 0.000 5.162 5.162 multiply_cannon_loop 2286 14.5 0.054 0.054 5.069 5.069 dbcsr_make_images_dense 3978 14.8 0.017 0.017 5.015 5.015 multiply_cannon_multrec 2286 15.5 4.969 4.969 5.014 5.014 density_rs2pw 119 9.7 0.003 0.003 3.921 3.921 init_scf_run 11 5.9 0.003 0.003 3.666 3.666 scf_env_initial_rho_setup 11 6.9 0.002 0.002 3.663 3.663 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.605 3.605 qs_ot_get_p 119 10.4 0.001 0.001 3.485 3.485 dbcsr_copy 2102 12.0 0.198 0.198 3.476 3.476 dbcsr_complete_redistribute 329 12.2 1.733 1.733 3.419 3.419 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.356 3.356 dbcsr_copy_into_existing 22 7.9 3.238 3.238 3.238 3.238 qs_create_task_list 11 7.9 0.000 0.000 3.193 3.193 generate_qs_task_list 11 8.9 1.952 1.952 3.193 3.193 wfi_extrapolate 11 7.9 0.001 0.001 3.156 3.156 copy_dbcsr_to_fm 153 11.3 0.002 0.002 2.894 2.894 fft3d_s 1202 13.6 2.864 2.864 2.869 2.869 cp_fm_cholesky_invert 11 10.9 2.794 2.794 2.794 2.794 cp_fm_cholesky_decompose 22 10.9 2.781 2.781 2.781 2.781 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.728 2.728 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.465 2.465 potential_pw2rs 119 12.3 0.046 0.046 2.293 2.293 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.219 2.219 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.121 2.121 qs_ot_p2m_diag 50 11.0 0.154 0.154 2.120 2.120 pw_poisson_solve 119 10.3 0.002 0.002 2.117 2.117 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 2.076 2.076 dbcsr_data_release 279534 16.0 2.009 2.009 2.009 2.009 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.845 1.845 pw_scatter_s 595 14.2 1.739 1.739 1.739 1.739 cp_dbcsr_syevd 50 12.0 0.002 0.002 1.728 1.728 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.034 47.829 47.847 qs_mol_dyn_low 1 2.0 0.004 0.005 47.679 47.684 qs_forces 11 3.9 0.001 0.002 47.631 47.632 qs_energies 11 4.9 0.001 0.001 44.636 44.639 scf_env_do_scf 11 5.9 0.000 0.002 40.397 40.398 scf_env_do_scf_inner_loop 108 6.5 0.003 0.020 37.259 37.259 velocity_verlet 10 3.0 0.001 0.003 28.309 28.310 rebuild_ks_matrix 119 8.3 0.000 0.001 16.974 17.018 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.016 16.974 17.017 dbcsr_multiply_generic 2286 12.5 0.072 0.076 15.097 15.183 qs_ks_update_qs_env 119 7.6 0.001 0.001 15.086 15.121 qs_rho_update_rho_low 119 7.7 0.001 0.001 13.075 13.085 calculate_rho_elec 119 8.7 0.030 0.031 13.075 13.085 sum_up_and_integrate 119 10.3 0.002 0.002 12.371 12.393 integrate_v_rspace 119 11.3 0.004 0.004 12.349 12.371 qs_scf_new_mos 108 7.5 0.001 0.001 12.113 12.171 qs_scf_loop_do_ot 108 8.5 0.001 0.001 12.112 12.171 ot_scf_mini 108 9.5 0.002 0.002 11.455 11.516 multiply_cannon 2286 13.5 0.137 0.146 11.062 11.356 multiply_cannon_loop 2286 14.5 0.105 0.113 10.472 10.619 grid_collocate_task_list 119 9.7 8.796 9.121 8.796 9.121 mp_waitall_1 158411 16.6 8.550 8.862 8.550 8.862 grid_integrate_task_list 119 12.3 8.477 8.631 8.477 8.631 multiply_cannon_metrocomm3 18288 15.5 0.051 0.054 6.667 6.907 ot_mini 108 10.5 0.001 0.001 6.569 6.635 density_rs2pw 119 9.7 0.005 0.006 3.870 4.166 multiply_cannon_multrec 18288 15.5 3.385 3.566 3.396 3.578 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.360 3.409 apply_single 119 13.6 0.000 0.000 3.359 3.409 potential_pw2rs 119 12.3 0.006 0.008 3.376 3.386 qs_ot_get_derivative 108 11.5 0.001 0.001 3.319 3.383 fft_wrap_pw1pw2 1201 11.6 0.016 0.018 3.210 3.247 ot_diis_step 108 11.5 0.003 0.003 3.233 3.233 init_scf_run 11 5.9 0.000 0.006 3.182 3.183 scf_env_initial_rho_setup 11 6.9 0.000 0.004 3.182 3.182 init_scf_loop 11 6.9 0.000 0.000 3.122 3.122 wfi_extrapolate 11 7.9 0.001 0.001 2.961 2.961 fft_wrap_pw1pw2_140 487 12.2 0.134 0.145 2.722 2.785 make_m2s 4572 13.5 0.047 0.055 2.645 2.700 transfer_rs2pw 487 10.6 0.006 0.007 2.349 2.652 transfer_pw2rs 487 13.2 0.006 0.006 2.524 2.529 make_images 4572 14.5 0.116 0.121 2.312 2.375 fft3d_ps 1201 13.6 1.183 1.260 2.316 2.364 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.021 2.030 mp_waitany 9880 13.7 1.644 1.973 1.644 1.973 qs_ot_get_p 119 10.4 0.001 0.001 1.512 1.567 transfer_rs2pw_140 130 11.5 0.193 0.227 1.205 1.514 make_images_data 4572 15.5 0.039 0.045 1.263 1.359 transfer_pw2rs_140 130 13.9 0.397 0.431 1.279 1.296 prepare_preconditioner 11 7.9 0.000 0.000 1.249 1.263 make_preconditioner 11 8.9 0.000 0.000 1.249 1.263 hybrid_alltoall_any 4725 16.4 0.069 0.183 1.096 1.181 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.137 1.152 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.044 1.077 mp_alltoall_d11v 2130 13.8 0.793 1.073 0.793 1.073 mp_sum_l 11298 13.2 0.873 1.024 0.873 1.024 mp_alltoall_z22v 1201 15.6 0.881 0.993 0.881 0.993 transfer_pw2rs_50 119 14.3 0.360 0.375 0.971 0.993 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.983 0.985 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.953 0.984 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=47.088, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.714, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.846, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.238, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.969, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=3.238, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=16.977, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.796, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.477, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.385, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.55, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.644, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.031 111.866 111.866 qs_mol_dyn_low 1 2.0 0.003 0.003 111.215 111.215 qs_forces 11 3.9 0.001 0.001 111.172 111.172 qs_energies 11 4.9 0.001 0.001 103.283 103.283 scf_env_do_scf 11 5.9 0.001 0.001 90.374 90.374 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 74.904 74.904 velocity_verlet 10 3.0 0.002 0.002 70.788 70.788 rebuild_ks_matrix 107 8.3 0.001 0.001 33.979 33.979 qs_ks_build_kohn_sham_matrix 107 9.3 0.012 0.012 33.978 33.978 qs_rho_update_rho_low 107 7.7 0.001 0.001 32.206 32.206 calculate_rho_elec 107 8.7 0.862 0.862 32.205 32.205 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.536 30.536 grid_collocate_task_list 107 9.7 27.639 27.639 27.639 27.639 sum_up_and_integrate 107 10.3 0.001 0.001 27.059 27.059 integrate_v_rspace 107 11.3 0.111 0.111 26.981 26.981 grid_integrate_task_list 107 12.3 24.511 24.511 24.511 24.511 dbcsr_multiply_generic 1966 12.4 0.152 0.152 19.266 19.266 qs_scf_new_mos 96 7.5 0.001 0.001 19.128 19.128 qs_scf_loop_do_ot 96 8.5 0.001 0.001 19.127 19.127 ot_scf_mini 96 9.5 0.002 0.002 17.831 17.831 init_scf_loop 11 6.9 0.000 0.000 15.244 15.244 ot_mini 96 10.5 0.001 0.001 11.431 11.431 prepare_preconditioner 11 7.9 0.000 0.000 11.121 11.121 make_preconditioner 11 8.9 0.000 0.000 11.121 11.121 make_m2s 3932 13.4 0.040 0.040 10.195 10.195 make_full_inverse_cholesky 11 9.9 0.024 0.024 9.991 9.991 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.010 7.010 fft_wrap_pw1pw2 1081 11.6 0.008 0.008 6.726 6.726 qs_ot_get_derivative 96 11.5 0.001 0.001 6.134 6.134 fft_wrap_pw1pw2_140 439 12.2 1.051 1.051 5.814 5.814 multiply_cannon 1966 13.4 0.157 0.157 5.455 5.455 dbcsr_make_dense_low 4961 15.5 0.070 0.070 5.395 5.395 make_dense_data 4961 16.5 4.901 4.901 5.313 5.313 init_scf_run 11 5.9 0.003 0.003 5.297 5.297 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.294 5.294 ot_diis_step 96 11.5 0.003 0.003 5.294 5.294 make_images 3932 14.4 2.174 2.174 5.277 5.277 multiply_cannon_loop 1966 14.4 0.038 0.038 5.045 5.045 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.007 5.007 apply_single 107 13.6 0.000 0.000 5.007 5.007 multiply_cannon_multrec 1966 15.4 4.965 4.965 5.006 5.006 wfi_extrapolate 11 7.9 0.001 0.001 4.677 4.677 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.659 4.659 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.318 4.318 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.048 4.048 density_rs2pw 107 9.7 0.003 0.003 3.704 3.704 qs_create_task_list 11 7.9 0.000 0.000 3.652 3.652 generate_qs_task_list 11 8.9 2.384 2.384 3.652 3.652 dbcsr_copy 1855 11.9 0.209 0.209 3.612 3.612 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.570 3.570 dbcsr_complete_redistribute 317 12.2 1.744 1.744 3.432 3.432 dbcsr_copy_into_existing 22 7.9 3.362 3.362 3.362 3.362 qs_ot_get_p 107 10.4 0.001 0.001 3.163 3.163 copy_dbcsr_to_fm 147 11.2 0.002 0.002 2.963 2.963 cp_fm_cholesky_invert 11 10.9 2.919 2.919 2.919 2.919 cp_fm_cholesky_decompose 22 10.9 2.891 2.891 2.891 2.891 fft3d_s 1082 13.6 2.884 2.884 2.889 2.889 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.533 2.533 potential_pw2rs 107 12.3 0.043 0.043 2.359 2.359 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.271 2.271 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.036 73.198 73.217 qs_mol_dyn_low 1 2.0 0.003 0.004 73.042 73.048 qs_forces 11 3.9 0.002 0.002 72.997 72.997 qs_energies 11 4.9 0.001 0.001 68.181 68.184 scf_env_do_scf 11 5.9 0.000 0.002 63.176 63.177 scf_env_do_scf_inner_loop 96 6.5 0.003 0.021 58.174 58.175 velocity_verlet 10 3.0 0.001 0.003 44.148 44.149 rebuild_ks_matrix 107 8.3 0.000 0.001 31.739 31.828 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.019 31.739 31.827 qs_ks_update_qs_env 107 7.6 0.001 0.001 28.024 28.104 sum_up_and_integrate 107 10.3 0.002 0.002 27.445 27.488 integrate_v_rspace 107 11.3 0.004 0.004 27.424 27.472 qs_rho_update_rho_low 107 7.7 0.001 0.001 25.353 25.360 calculate_rho_elec 107 8.7 0.027 0.028 25.353 25.359 grid_integrate_task_list 107 12.3 22.611 23.457 22.611 23.457 grid_collocate_task_list 107 9.7 21.637 22.108 21.637 22.108 dbcsr_multiply_generic 1966 12.4 0.066 0.077 13.404 13.872 qs_scf_new_mos 96 7.5 0.001 0.001 10.703 10.775 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.702 10.774 multiply_cannon 1966 13.4 0.123 0.145 10.031 10.324 ot_scf_mini 96 9.5 0.002 0.003 10.138 10.219 multiply_cannon_loop 1966 14.4 0.089 0.107 9.490 9.826 mp_waitall_1 136719 16.5 7.775 8.497 7.775 8.497 multiply_cannon_metrocomm3 15728 15.4 0.041 0.050 6.005 6.596 ot_mini 96 10.5 0.001 0.001 6.054 6.136 init_scf_loop 11 6.9 0.000 0.000 4.984 4.985 density_rs2pw 107 9.7 0.004 0.005 3.338 3.952 init_scf_run 11 5.9 0.000 0.006 3.913 3.913 scf_env_initial_rho_setup 11 6.9 0.000 0.004 3.913 3.913 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.840 3.849 wfi_extrapolate 11 7.9 0.001 0.001 3.564 3.564 multiply_cannon_multrec 15728 15.4 3.091 3.482 3.101 3.492 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.160 3.290 apply_single 107 13.6 0.000 0.001 3.160 3.290 potential_pw2rs 107 12.3 0.005 0.006 3.189 3.216 fft_wrap_pw1pw2 1081 11.6 0.013 0.016 3.030 3.102 qs_ot_get_derivative 96 11.5 0.001 0.001 3.014 3.098 ot_diis_step 96 11.5 0.003 0.003 3.018 3.018 mp_alltoall_d11v 1998 13.7 1.905 2.630 1.905 2.630 fft_wrap_pw1pw2_140 439 12.2 0.114 0.133 2.537 2.595 transfer_rs2pw 439 10.6 0.006 0.007 1.931 2.529 transfer_pw2rs 439 13.2 0.005 0.007 2.403 2.410 make_m2s 3932 13.4 0.040 0.044 2.336 2.409 fft3d_ps 1081 13.6 1.050 1.195 2.224 2.364 rs_gather_matrices 107 12.3 0.061 0.070 1.581 2.269 mp_waitany 8968 13.7 1.632 2.168 1.632 2.168 make_images 3932 14.4 0.103 0.110 2.046 2.127 transfer_rs2pw_140 118 11.5 0.158 0.182 1.103 1.704 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=46.488000000000014, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.639, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.511, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.965, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.901, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=3.362, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.178999999999988, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.637, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.611, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.091, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.775, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=1.905, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.225 0.225 137.328 137.328 qs_energies 1 2.0 0.000 0.000 136.485 136.485 scf_env_do_scf 1 3.0 0.000 0.000 135.286 135.286 qs_ks_update_qs_env 8 5.0 0.000 0.000 130.389 130.389 rebuild_ks_matrix 7 6.0 0.000 0.000 130.334 130.334 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 130.334 130.334 hfx_ks_matrix 7 8.0 0.000 0.000 121.610 121.610 integrate_four_center 7 9.0 1.533 1.533 121.568 121.568 integrate_four_center_main 7 10.0 0.270 0.270 107.632 107.632 integrate_four_center_bin 449 11.0 107.362 107.362 107.362 107.362 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 77.522 77.522 init_scf_loop 1 4.0 0.000 0.000 57.753 57.753 integrate_four_center_load 7 10.0 0.001 0.001 12.119 12.119 hfx_load_balance 1 11.0 0.005 0.005 12.119 12.119 hfx_load_balance_count 1 12.0 6.072 6.072 6.072 6.072 hfx_load_balance_bin 1 12.0 6.025 6.025 6.025 6.025 qs_vxc_create 14 8.0 0.000 0.000 3.351 3.351 xc_vxc_pw_create 14 9.0 0.117 0.117 3.351 3.351 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.218 0.246 134.593 134.605 qs_energies 1 2.0 0.000 0.000 134.181 134.192 scf_env_do_scf 1 3.0 0.000 0.000 133.820 133.821 qs_ks_update_qs_env 8 5.0 0.000 0.000 131.440 131.441 rebuild_ks_matrix 7 6.0 0.000 0.000 131.431 131.432 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 131.431 131.432 hfx_ks_matrix 7 8.0 0.000 0.000 125.674 125.676 integrate_four_center 7 9.0 0.056 0.343 125.664 125.666 integrate_four_center_main 7 10.0 0.003 0.004 104.771 112.446 integrate_four_center_bin 448 11.0 104.768 112.442 104.768 112.442 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 79.046 79.047 init_scf_loop 1 4.0 0.000 0.000 54.772 54.772 mp_sync 56 11.2 7.727 12.941 7.727 12.941 integrate_four_center_load 7 10.0 0.000 0.000 12.506 12.518 hfx_load_balance 1 11.0 0.001 0.001 12.506 12.518 hfx_load_balance_bin 1 12.0 5.965 6.568 5.965 6.568 hfx_load_balance_count 1 12.0 5.915 6.051 5.915 6.051 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=15.841000000000008, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=107.362, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=6.072, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=6.025, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.533, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.27, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.225, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.940999999999974, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=104.768, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.915, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.965, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.056, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.218, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=7.727, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 79.469 79.469 qs_energies 1 2.0 0.000 0.000 79.092 79.092 mp2_main 1 3.0 0.000 0.000 75.887 75.887 mp2_gpw_main 1 4.0 0.000 0.000 75.776 75.776 rpa_ri_compute_en 1 5.0 0.000 0.000 72.218 72.218 rpa_num_int 1 6.0 0.001 0.001 72.212 72.212 compute_mat_P_omega 1 7.0 0.004 0.004 63.096 63.096 compute_mat_P_omega_contract 10 8.0 8.908 8.908 62.895 62.895 dbt_total 2336 9.6 0.015 0.015 49.054 49.054 dbt_contract 787 11.0 0.043 0.043 41.778 41.778 dbt_tas_total 1149 12.2 0.206 0.206 40.146 40.146 dbt_tas_multiply 807 12.1 0.002 0.002 38.915 38.915 dbt_tas_dbm 807 14.1 0.003 0.003 32.392 32.392 dbm_multiply 807 16.1 32.383 32.383 32.383 32.383 dbt_tas_mm_1N 524 15.1 0.001 0.001 25.053 25.053 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 23.824 23.824 compute_mat_P_omega_calc_M_occ 250 9.0 8.886 8.886 17.334 17.334 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 6.873 6.873 dbt_copy 1103 10.7 0.125 0.125 6.024 6.024 dbt_tas_mm_2 251 15.0 0.001 0.001 5.861 5.861 compute_QP_energies 1 7.0 0.000 0.000 4.980 4.980 compute_self_energy_cubic_gw 1 8.0 0.053 0.053 4.978 4.978 dbt_tas_reserve_blocks_index 3261 14.3 0.483 0.483 4.001 4.001 contract_cubic_gw 21 9.0 0.000 0.000 3.985 3.985 dbm_reserve_blocks 3628 15.3 3.595 3.595 3.595 3.595 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.551 3.551 scf_env_do_scf 1 3.0 0.000 0.000 3.090 3.090 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 3.090 3.090 dbt_reserve_blocks_index 2280 13.1 0.058 0.058 3.039 3.039 dbt_reserve_blocks_index_array 2222 12.2 0.010 0.010 2.982 2.982 dbt_crop 1042 12.0 1.577 1.577 2.743 2.743 dbt_tas_copy 574 11.4 1.368 1.368 2.411 2.411 convert_to_new_pgrid 2421 14.1 0.066 0.066 2.366 2.366 dbm_copy 1614 15.1 2.300 2.300 2.300 2.300 dbt_reshape 278 11.9 1.115 1.115 2.026 2.026 dbt_tas_reshape 367 15.0 0.026 0.026 1.992 1.992 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.669 1.669 compute_W_cubic_GW 10 7.0 0.009 0.009 1.649 1.649 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 1.626 1.626 get_2c_integrals 1 6.0 0.000 0.000 1.600 1.600 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.026 36.462 36.476 qs_energies 1 2.0 0.000 0.000 36.320 36.322 mp2_main 1 3.0 0.000 0.000 35.091 35.093 mp2_gpw_main 1 4.0 0.000 0.000 35.002 35.004 rpa_ri_compute_en 1 5.0 0.000 0.000 33.649 33.650 rpa_num_int 1 6.0 0.000 0.002 33.648 33.650 dbt_total 2336 9.6 0.016 0.017 29.720 29.727 compute_mat_P_omega 1 7.0 0.001 0.006 28.263 28.275 compute_mat_P_omega_contract 10 8.0 0.417 0.432 27.999 28.003 dbt_contract 787 11.0 0.041 0.042 21.988 21.992 dbt_tas_total 1149 12.2 0.082 0.091 19.496 19.500 dbt_tas_multiply 807 12.1 0.002 0.002 19.405 19.408 dbt_tas_dbm 807 14.1 0.003 0.003 14.136 14.149 dbm_multiply 807 16.1 11.162 12.061 11.162 12.061 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.287 8.288 compute_mat_P_omega_calc_M_occ 250 9.0 0.403 0.417 8.174 8.175 dbt_copy 1149 10.8 0.014 0.015 6.652 7.013 mp_sync 8688 11.6 5.477 6.731 5.477 6.731 dbt_reshape 1136 11.8 2.755 2.944 6.324 6.687 dbt_tas_mm_2 251 15.0 0.001 0.002 6.647 6.650 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.829 5.830 dbt_tas_mm_1N 524 15.1 0.001 0.002 5.008 5.455 compute_QP_energies 1 7.0 0.000 0.000 3.539 3.540 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.538 3.539 mp_waitall_2 3812 15.3 2.667 2.976 2.667 2.976 dbt_communicate_buffer 1136 12.8 0.051 0.055 2.642 2.873 contract_cubic_gw 21 9.0 0.000 0.000 2.755 2.755 dbt_reserve_blocks_index 2887 13.1 0.074 0.081 1.874 2.137 dbt_reserve_blocks_index_array 2829 12.2 0.008 0.010 1.863 2.123 dbt_tas_reserve_blocks_index 3347 14.5 0.433 0.466 1.851 2.112 dbm_reserve_blocks 3752 15.4 1.517 1.768 1.517 1.768 dbt_crop 1042 12.0 0.928 1.027 1.515 1.723 mp2_ri_gpw_compute_in 1 5.0 0.003 0.004 1.336 1.336 dbt_tas_replicate 405 14.1 0.547 0.720 1.165 1.244 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.168 1.171 scf_env_do_scf 1 3.0 0.000 0.000 1.169 1.169 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 1.169 1.169 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.100 1.103 parallel_gemm_fm 105 8.4 0.000 0.000 1.086 1.101 parallel_gemm_fm_cosma 105 9.4 1.086 1.100 1.086 1.100 convert_to_new_pgrid 2421 14.1 0.026 0.032 0.858 0.998 dbm_copy 1608 15.1 0.826 0.965 0.826 0.965 mp_max_i 2009 9.8 0.703 0.933 0.703 0.933 compute_W_cubic_GW 10 7.0 0.001 0.001 0.825 0.838 mp_sum_l 6165 12.9 0.634 0.775 0.634 0.775 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.28199999999999, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=32.383, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.908, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.886, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.595, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.3, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.115, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.238000000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=11.162, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.417, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.403, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.517, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.826, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.755, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.667, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.477, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.021 0.021 240.154 240.154 qs_forces 1 2.0 0.000 0.000 239.628 239.628 rebuild_ks_matrix 7 6.6 0.000 0.000 238.125 238.125 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 238.125 238.125 hfx_ks_matrix 7 8.6 0.000 0.000 236.114 236.114 hfx_ri_update_ks 7 9.6 0.000 0.000 195.158 195.158 hfx_ri_update_ks_Pmat 7 10.6 31.541 31.541 195.155 195.155 dbt_total 849 11.0 0.006 0.006 182.120 182.120 qs_energies 1 3.0 0.000 0.000 172.866 172.866 scf_env_do_scf 1 4.0 0.000 0.000 172.548 172.548 qs_ks_update_qs_env 8 6.0 0.000 0.000 171.412 171.412 dbt_contract 207 12.4 0.057 0.057 162.388 162.388 dbt_tas_total 369 13.4 1.597 1.597 161.381 161.381 dbt_tas_multiply 216 13.5 0.001 0.001 157.311 157.311 dbt_tas_dbm 216 15.5 0.001 0.001 144.856 144.856 dbm_multiply 216 17.5 144.854 144.854 144.854 144.854 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 139.241 139.241 dbt_tas_mm_2 91 16.5 0.001 0.001 131.482 131.482 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 107.776 107.776 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 66.716 66.716 init_scf_loop 2 5.0 0.000 0.000 64.770 64.770 hfx_ri_update_forces 1 7.0 1.560 1.560 40.953 40.953 hfx_ri_forces_Pmat_3c 1 8.0 4.645 4.645 23.983 23.983 dbt_copy 423 11.8 0.074 0.074 15.654 15.654 precalc_derivatives 1 8.0 2.262 2.262 12.962 12.962 dbt_reshape 132 13.2 6.728 6.728 10.831 10.831 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.385 9.385 dbt_tas_mm_3T 77 17.1 0.000 0.000 9.125 9.125 dbt_tas_reserve_blocks_index 1323 15.4 0.995 0.995 7.696 7.696 build_3c_derivatives 3 9.0 2.445 2.445 7.031 7.031 dbm_reserve_blocks 1491 16.2 6.895 6.895 6.895 6.895 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 6.801 6.801 dbt_reserve_blocks_index 846 14.4 0.095 0.095 5.959 5.959 dbt_reserve_blocks_index_array 816 13.5 0.007 0.007 5.862 5.862 dbt_tas_reshape 168 14.5 0.003 0.003 4.924 4.924 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.039 50.721 50.733 qs_forces 1 2.0 0.000 0.000 50.522 50.522 rebuild_ks_matrix 7 6.6 0.000 0.000 49.741 49.741 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 49.741 49.741 hfx_ks_matrix 7 8.6 0.000 0.000 48.719 48.725 dbt_total 849 11.0 0.006 0.007 43.471 43.520 dbt_contract 207 12.4 0.026 0.028 33.473 33.488 dbt_tas_total 369 13.4 0.055 0.141 29.364 29.377 dbt_tas_multiply 216 13.5 0.001 0.001 28.882 28.883 hfx_ri_update_ks 7 9.6 0.000 0.000 28.724 28.724 hfx_ri_update_ks_Pmat 7 10.6 1.272 1.331 28.720 28.721 qs_energies 1 3.0 0.000 0.000 27.065 27.065 scf_env_do_scf 1 4.0 0.000 0.001 26.907 26.907 qs_ks_update_qs_env 8 6.0 0.000 0.000 26.295 26.295 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 23.447 23.448 dbt_tas_dbm 216 15.5 0.001 0.001 22.135 22.147 dbm_multiply 216 17.5 19.759 20.682 19.759 20.682 hfx_ri_update_forces 1 7.0 0.059 0.063 19.995 20.001 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 14.958 14.959 hfx_ri_forces_Pmat_3c 1 8.0 0.166 0.196 14.895 14.924 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 12.696 12.708 init_scf_loop 2 5.0 0.000 0.000 11.947 11.947 dbt_tas_mm_2 91 16.5 0.001 0.001 9.928 9.941 dbt_copy 539 12.5 0.012 0.014 8.868 9.284 mp_sync 2901 12.8 5.410 7.131 5.410 7.131 dbt_reshape 393 13.9 3.550 3.687 6.799 7.107 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.778 6.411 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 5.154 5.155 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.565 4.646 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 4.030 4.030 precalc_derivatives 1 8.0 0.088 0.094 3.836 3.836 dbt_tas_reserve_blocks_index 1471 15.8 0.890 0.917 3.218 3.595 mp_waitall_2 1318 16.2 3.072 3.235 3.072 3.235 dbt_reserve_blocks_index 1107 14.8 0.113 0.121 2.650 2.950 dbm_reserve_blocks 1641 16.6 2.542 2.924 2.542 2.924 dbt_reserve_blocks_index_array 1077 13.9 0.005 0.006 2.621 2.921 dbt_crop 372 13.7 1.776 1.831 2.558 2.703 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 2.313 2.319 dbt_communicate_buffer 393 14.9 0.012 0.013 2.136 2.292 build_3c_derivatives 3 9.0 0.222 0.241 2.161 2.169 dbt_tas_replicate 170 15.1 0.665 0.695 1.748 1.829 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.705 1.712 convert_to_new_pgrid 648 15.5 0.039 0.101 1.380 1.631 dbm_copy 452 16.3 1.197 1.455 1.197 1.455 dbt_tas_copy 146 12.6 0.687 0.730 1.314 1.449 mp_sum_l 6385 13.7 1.022 1.327 1.022 1.327 dbt_tas_communicate_buffer 370 16.3 0.012 0.012 1.062 1.121 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=45.49099999999996, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=144.854, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=31.541, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.895, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=6.728, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.645, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=14.949999999999996, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=19.759, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.272, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.542, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.55, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.166, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.072, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=5.41, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.019 213.627 213.627 qs_energies 1 2.0 0.000 0.000 213.358 213.358 mp2_main 1 3.0 0.000 0.000 208.837 208.837 mp2_gpw_main 1 4.0 0.002 0.002 208.479 208.479 mp2_ri_gpw_compute_in 1 5.0 0.385 0.385 157.390 157.390 mp2_ri_gpw_compute_in_loop 1 6.0 0.010 0.010 148.186 148.186 mp2_eri_3c_integrate_gpw 2656 7.0 0.014 0.014 120.616 120.616 integrate_v_rspace 2666 8.0 0.625 0.625 107.509 107.509 grid_integrate_task_list 2666 9.0 104.633 104.633 104.633 104.633 mp2_ri_gpw_compute_en 1 5.0 0.086 0.086 51.066 51.066 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.613 11.613 48.984 48.984 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.580 2.580 28.634 28.634 local_gemm 2080 8.0 26.054 26.054 26.054 26.054 dbcsr_multiply_generic 5322 8.0 0.182 0.182 21.166 21.166 ao_to_mo_and_store_B_mult_1 2656 7.0 0.010 0.010 21.144 21.144 fft_wrap_pw1pw2 53228 10.4 0.122 0.122 12.318 12.318 calculate_wavefunction 2656 8.0 7.913 7.913 11.776 11.776 multiply_cannon 5322 9.0 0.480 0.480 10.690 10.690 multiply_cannon_loop 5322 10.0 0.162 0.162 9.342 9.342 fft_wrap_pw1pw2_20 21271 11.4 1.014 1.014 8.882 8.882 get_2c_integrals 1 6.0 0.000 0.000 8.817 8.817 make_m2s 10644 9.0 0.059 0.059 8.323 8.323 compute_2c_integrals 1 7.0 0.005 0.005 8.277 8.277 compute_2c_integrals_loop_lm 1 8.0 0.013 0.013 8.266 8.266 mp2_eri_2c_integrate_gpw 1 9.0 0.847 0.847 8.253 8.253 make_images 10644 10.0 3.232 3.232 7.933 7.933 multiply_cannon_multrec 5322 11.0 7.715 7.715 7.754 7.754 fft3d_s 53229 12.4 6.978 6.978 7.063 7.063 mp2_ri_gpw_compute_en_ener 2080 7.0 6.375 6.375 6.375 6.375 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.436 2.436 6.314 6.314 potential_pw2rs 5322 10.0 0.145 0.145 4.393 4.393 copy_dbcsr_to_fm 2679 8.0 0.027 0.027 4.327 4.327 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.035 36.934 36.947 qs_energies 1 2.0 0.000 0.000 36.548 36.548 mp2_main 1 3.0 0.000 0.001 34.635 34.636 mp2_gpw_main 1 4.0 0.001 0.002 34.539 34.540 mp2_ri_gpw_compute_in 1 5.0 0.046 0.046 18.056 18.795 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 16.830 17.572 mp2_ri_gpw_compute_en 1 5.0 0.282 0.290 16.420 16.754 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.004 14.769 14.905 integrate_v_rspace 93 8.1 0.100 0.122 14.653 14.791 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.124 2.482 14.784 14.790 grid_integrate_task_list 93 9.1 14.370 14.512 14.370 14.512 mp2_ri_gpw_compute_en_expansio 65 7.0 0.091 0.110 10.110 10.254 local_gemm 65 8.0 10.019 10.145 10.019 10.145 mp2_ri_gpw_compute_en_comm 17 7.0 0.090 0.144 2.255 3.063 mp_sendrecv_dm3 1054 8.0 1.728 2.782 1.728 2.782 dbcsr_multiply_generic 176 8.0 0.007 0.009 1.757 2.434 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.742 2.418 scf_env_do_scf 1 3.0 0.000 0.000 1.775 1.775 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 1.775 1.775 make_m2s 352 9.0 0.003 0.003 0.715 1.233 make_images 352 10.0 0.050 0.054 0.703 1.220 get_2c_integrals 1 6.0 0.005 0.013 1.165 1.182 multiply_cannon 176 9.0 0.017 0.019 1.007 1.158 mp_min_d 2 7.0 0.767 1.128 0.767 1.128 multiply_cannon_loop 176 10.0 0.002 0.002 0.952 1.099 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 0.742 1.076 multiply_cannon_multrec 246 11.0 0.819 0.893 0.824 0.900 compute_2c_integrals 1 7.0 0.003 0.004 0.858 0.867 qs_scf_new_mos 10 5.0 0.000 0.000 0.826 0.830 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.769 0.793 mp2_eri_2c_integrate_gpw 1 9.0 0.213 0.215 0.768 0.792 eigensolver 11 5.8 0.001 0.001 0.783 0.784 hybrid_alltoall_any 458 11.4 0.359 0.726 0.376 0.748 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=55.69900000000001, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=104.633, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=26.054, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.613, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=7.913, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.715, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.873999999999999, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.37, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=10.019, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=2.124, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.819, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.728, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.099 0.099 135.864 135.864 qs_energies 1 2.0 0.000 0.000 134.485 134.485 scf_env_do_scf 1 3.0 0.000 0.000 126.906 126.906 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 126.906 126.906 qs_ks_update_qs_env 15 5.0 0.000 0.000 56.958 56.958 rebuild_ks_matrix 15 6.0 0.000 0.000 56.732 56.732 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 56.732 56.732 qs_scf_new_mos 15 5.0 0.000 0.000 44.314 44.314 qs_vxc_create 15 8.0 0.000 0.000 39.462 39.462 eigensolver 15 6.0 0.002 0.002 36.558 36.558 calculate_dispersion_nonloc 15 9.0 6.671 6.671 34.192 34.192 fft_wrap_pw1pw2 1086 10.0 0.016 0.016 29.432 29.432 cp_fm_diag_elpa 15 7.0 0.000 0.000 22.739 22.739 cp_fm_diag_elpa_base 15 8.0 20.242 20.242 22.738 22.738 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.840 21.840 calculate_rho_elec 16 6.0 0.220 0.220 21.840 21.840 fft_wrap_pw1pw2_150 765 11.0 6.900 6.900 21.539 21.539 grid_collocate_task_list 16 7.0 20.119 20.119 20.119 20.119 sum_up_and_integrate 15 8.0 0.000 0.000 16.117 16.117 integrate_v_rspace 15 9.0 0.022 0.022 16.088 16.088 grid_integrate_task_list 15 10.0 15.351 15.351 15.351 15.351 cp_fm_cholesky_restore 45 7.0 11.482 11.482 11.482 11.482 fft3d_s 1087 12.0 11.111 11.111 11.119 11.119 fft_wrap_pw1pw2_200 197 11.3 1.774 1.774 7.666 7.666 pw_scatter_s 585 12.1 7.618 7.618 7.618 7.618 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.603 5.603 dbcsr_complete_redistribute 46 8.3 2.083 2.083 5.398 5.398 xc_vxc_pw_create 15 9.0 0.207 0.207 5.269 5.269 gspace_mixing 14 5.0 0.171 0.171 5.008 5.008 cp_fm_upper_to_full 30 8.0 4.832 4.832 4.832 4.832 vdW_energy 15 10.0 4.527 4.527 4.527 4.527 broyden_mixing 14 6.0 4.312 4.312 4.312 4.312 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.292 4.292 xc_pw_derive 90 11.0 0.001 0.001 3.762 3.762 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 3.056 3.056 init_scf_run 1 3.0 0.000 0.000 2.808 2.808 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.034 65.715 65.727 qs_energies 1 2.0 0.000 0.000 65.354 65.361 scf_env_do_scf 1 3.0 0.000 0.001 60.725 60.726 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 60.725 60.726 qs_ks_update_qs_env 15 5.0 0.000 0.000 26.769 26.778 rebuild_ks_matrix 15 6.0 0.000 0.000 26.721 26.730 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 26.721 26.730 qs_rho_update_rho_low 16 5.0 0.000 0.000 20.270 20.277 calculate_rho_elec 16 6.0 0.007 0.007 20.270 20.277 grid_collocate_task_list 16 7.0 18.990 19.144 18.990 19.144 sum_up_and_integrate 15 8.0 0.000 0.000 15.674 15.714 integrate_v_rspace 15 9.0 0.001 0.001 15.664 15.707 grid_integrate_task_list 15 10.0 14.768 14.908 14.768 14.908 qs_scf_new_mos 15 5.0 0.000 0.001 14.219 14.257 eigensolver 15 6.0 0.002 0.002 13.021 13.040 qs_vxc_create 15 8.0 0.001 0.001 10.567 10.578 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.226 9.237 cp_fm_diag_elpa_base 15 8.0 9.062 9.086 9.216 9.219 calculate_dispersion_nonloc 15 9.0 0.959 0.970 8.418 8.437 fft_wrap_pw1pw2 1086 10.0 0.021 0.022 7.927 7.977 fft3d_ps 1086 12.0 2.905 3.093 5.916 6.026 fft_wrap_pw1pw2_150 765 11.0 0.141 0.153 5.067 5.089 cp_fm_cholesky_restore 45 7.0 3.619 3.683 3.619 3.683 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.915 2.915 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.599 2.810 fft_wrap_pw1pw2_200 197 11.3 0.118 0.126 2.712 2.759 mp_alltoall_z22v 1086 14.0 2.323 2.622 2.323 2.622 xc_vxc_pw_create 15 9.0 0.018 0.024 2.148 2.176 build_core_ppnl 1 5.0 1.614 1.783 1.614 1.783 x_to_yz 585 13.1 0.367 0.384 1.601 1.734 yz_to_x 501 12.9 0.297 0.358 1.386 1.558 xc_pw_derive 90 11.0 0.001 0.001 1.466 1.543 vdW_energy 15 10.0 1.457 1.513 1.457 1.513 init_scf_run 1 3.0 0.000 0.001 1.479 1.480 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.343 1.343 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=57.559, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=20.242, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.119, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.351, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.482, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.111, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=16.371000000000002, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.062, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.99, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.768, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.619, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.905, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.079 0.079 346.581 346.581 qs_energies 1 2.0 0.000 0.000 346.439 346.439 ls_scf 1 3.0 0.000 0.000 344.984 344.984 ls_scf_main 1 4.0 0.002 0.002 334.136 334.136 density_matrix_trs4 11 5.0 0.012 0.012 244.121 244.121 arnoldi_extremal 12 6.1 0.000 0.000 156.180 156.180 arnoldi_normal_ev 12 7.1 0.047 0.047 156.180 156.180 dbcsr_matrix_vector_mult 652 9.0 0.236 0.236 153.713 153.713 build_subspace 23 8.1 0.084 0.084 153.423 153.423 dbcsr_matrix_vector_mult_local 652 10.0 151.839 151.839 151.849 151.849 ls_scf_dm_to_ks 11 5.0 0.000 0.000 84.203 84.203 matrix_ls_to_qs 11 6.0 0.000 0.000 80.764 80.764 dbcsr_multiply_generic 185 6.1 0.824 0.824 76.058 76.058 multiply_cannon 185 7.1 0.443 0.443 44.188 44.188 dbcsr_copy_into_existing 11 7.0 42.762 42.762 42.763 42.763 dbcsr_complete_redistribute 23 7.5 30.596 30.596 42.169 42.169 matrix_decluster 11 7.0 0.000 0.000 38.001 38.001 multiply_cannon_loop 185 8.1 0.222 0.222 31.651 31.651 make_m2s 370 7.1 0.039 0.039 27.054 27.054 make_images 370 8.1 11.775 11.775 25.035 25.035 multiply_cannon_multrec 185 9.1 22.934 22.934 22.975 22.975 dbcsr_finalize 646 7.5 0.326 0.326 15.683 15.683 dbcsr_merge_all 597 8.5 2.361 2.361 14.254 14.254 setup_rec_index_2d 370 8.1 11.920 11.920 11.920 11.920 dbcsr_sort_indices 805 10.1 11.582 11.582 11.582 11.582 tree_to_linear_d 110 9.4 10.519 10.519 10.519 10.519 ls_scf_init_scf 1 4.0 0.000 0.000 10.143 10.143 quick_finalize 395 10.0 0.372 0.372 9.971 9.971 ls_scf_init_matrix_S 1 5.0 0.000 0.000 9.672 9.672 dbcsr_special_finalize 370 9.1 0.002 0.002 9.136 9.136 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 8.712 8.712 calculate_norms 370 9.1 8.453 8.453 8.453 8.453 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.033 76.575 76.587 qs_energies 1 2.0 0.000 0.000 76.448 76.448 ls_scf 1 3.0 0.000 0.000 76.393 76.394 ls_scf_main 1 4.0 0.001 0.011 73.435 73.436 density_matrix_trs4 11 5.0 0.007 0.022 70.651 70.728 dbcsr_multiply_generic 185 6.1 0.064 0.073 67.064 67.249 multiply_cannon 185 7.1 0.037 0.039 55.533 56.214 multiply_cannon_loop 185 8.1 0.129 0.139 52.859 54.144 multiply_cannon_multrec 1480 9.1 31.844 33.774 32.211 34.135 mp_waitall_1 11936 10.3 17.724 20.105 17.724 20.105 multiply_cannon_metrocomm3 1480 9.1 0.016 0.017 13.632 16.243 make_m2s 370 7.1 0.039 0.041 7.607 7.669 make_images 370 8.1 0.662 0.696 7.465 7.526 calculate_norms 2960 9.1 5.802 6.097 5.802 6.097 make_images_data 370 9.1 0.011 0.014 3.361 3.656 mp_sum_l 1199 5.3 2.935 3.543 2.935 3.543 hybrid_alltoall_any 393 9.9 0.263 1.524 2.990 3.185 arnoldi_extremal 12 6.1 0.000 0.001 2.712 2.724 arnoldi_normal_ev 12 7.1 0.002 0.008 2.711 2.723 build_subspace 23 8.1 0.022 0.028 2.610 2.612 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 2.071 2.518 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.390 2.469 dbcsr_complete_redistribute 23 7.5 1.287 1.332 2.190 2.266 dbcsr_matrix_vector_mult 652 9.0 0.015 0.055 2.171 2.238 ls_scf_init_scf 1 4.0 0.000 0.000 2.215 2.216 matrix_ls_to_qs 11 6.0 0.000 0.000 2.116 2.196 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.190 2.195 make_images_pack 370 9.1 1.916 2.094 1.920 2.097 matrix_decluster 11 7.0 0.000 0.000 1.977 2.058 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.002 1.984 1.988 multiply_cannon_metrocomm1 1480 9.1 0.006 0.007 1.046 1.823 dbcsr_matrix_vector_mult_local 652 10.0 1.737 1.798 1.739 1.800 buffer_matrices_ensure_size 370 8.1 1.550 1.663 1.550 1.663 dbcsr_finalize 646 7.5 0.008 0.012 1.454 1.549 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=78.07700000000006, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=151.839, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=42.762, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=30.596, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.934, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=11.92, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=8.453, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=13.329999999999998, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.737, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.287, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=31.844, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.802, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.935, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=17.724, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.916, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 70.226 70.226 lib_test 1 2.0 0.000 0.000 70.209 70.209 dbcsr_run_tests 3 3.0 0.002 0.002 70.209 70.209 test_multiplies_multiproc 3 4.0 0.001 0.001 54.096 54.096 dbcsr_redistribute 9 5.0 34.904 34.904 36.502 36.502 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.198 16.198 dbcsr_make_random_matrix 9 4.0 12.453 12.453 16.002 16.002 multiply_cannon 9 6.0 0.002 0.002 11.408 11.408 multiply_cannon_loop 9 7.0 0.018 0.018 11.055 11.055 multiply_cannon_multrec 9 8.0 11.037 11.037 11.037 11.037 dbcsr_finalize 27 5.7 0.015 0.015 6.026 6.026 dbcsr_merge_all 18 6.5 2.024 2.024 5.322 5.322 dbcsr_data_release 975 7.6 2.714 2.714 2.714 2.714 tree_to_linear_d 9 7.0 1.933 1.933 1.933 1.933 make_m2s 18 6.0 0.001 0.001 1.702 1.702 make_images 18 7.0 0.603 0.603 1.657 1.657 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.018 20.849 20.855 lib_test 1 2.0 0.000 0.000 20.811 20.832 dbcsr_run_tests 3 3.0 0.000 0.001 20.806 20.827 test_multiplies_multiproc 3 4.0 0.000 0.002 19.926 19.989 dbcsr_multiply_generic 9 5.0 0.001 0.001 18.389 18.486 multiply_cannon 9 6.0 0.002 0.002 16.096 16.565 multiply_cannon_loop 9 7.0 0.002 0.002 15.772 16.234 multiply_cannon_multrec 72 8.0 12.060 12.357 12.061 12.357 mp_waitall_1 576 9.2 4.089 4.705 4.089 4.705 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 3.604 4.156 mp_sum_l 470 2.5 0.927 1.447 0.927 1.447 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.846 1.365 dbcsr_make_random_matrix 9 4.0 0.688 0.707 0.843 0.868 make_m2s 18 6.0 0.001 0.001 0.797 0.841 make_images 18 7.0 0.021 0.025 0.794 0.838 dbcsr_data_release 444 7.6 0.638 0.782 0.638 0.782 dbcsr_destroy 111 5.9 0.002 0.056 0.556 0.706 dbcsr_finalize 27 5.7 0.000 0.000 0.604 0.673 dbcsr_redistribute 9 5.0 0.255 0.293 0.587 0.616 dbcsr_merge_all 18 6.5 0.094 0.112 0.518 0.578 make_images_data 18 8.0 0.001 0.001 0.426 0.497 hybrid_alltoall_any 18 9.0 0.040 0.177 0.368 0.450 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=7.094000000000001, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=34.904, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.453, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.037, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.714, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.024, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.0980000000000025, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.255, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.688, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.06, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.638, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.094, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.927, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.089, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.055 0.055 128.833 128.833 qs_mol_dyn_low 1 2.0 0.004 0.004 127.476 127.476 velocity_verlet 5 3.0 0.003 0.003 103.564 103.564 qmmm_el_coupling 6 3.8 0.000 0.000 82.814 82.814 qmmm_elec_with_gaussian 6 4.8 0.012 0.012 82.810 82.810 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 82.203 82.203 qmmm_elec_gaussian_low_G 6 6.8 81.340 81.340 81.340 81.340 qs_forces 6 3.8 0.001 0.001 36.377 36.377 qs_energies 6 4.8 0.000 0.000 32.399 32.399 scf_env_do_scf 6 5.8 0.001 0.001 30.014 30.014 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 26.116 26.116 rebuild_ks_matrix 45 8.4 0.000 0.000 25.084 25.084 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 25.084 25.084 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.527 21.527 fft_wrap_pw1pw2 801 12.6 0.010 0.010 20.244 20.244 fft_wrap_pw1pw2_150 507 14.2 5.190 5.190 19.798 19.798 qs_vxc_create 45 10.4 0.001 0.001 15.365 15.365 xc_vxc_pw_create 45 11.4 0.682 0.682 15.365 15.365 xc_pw_derive 270 13.4 0.002 0.002 11.516 11.516 fft3d_s 802 14.6 7.991 7.991 7.999 7.999 xc_pw_divergence 45 12.4 0.001 0.001 7.660 7.660 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.552 7.552 calculate_rho_elec 45 8.9 0.565 0.565 7.552 7.552 xc_rho_set_and_dset_create 45 12.4 0.586 0.586 6.960 6.960 pw_scatter_s 429 14.8 5.427 5.427 5.427 5.427 qmmm_forces 6 3.8 0.002 0.002 5.139 5.139 qmmm_forces_with_gaussian 6 4.8 0.016 0.016 4.817 4.817 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.114 4.114 init_scf_loop 6 6.8 0.000 0.000 3.893 3.893 sum_up_and_integrate 45 10.4 0.000 0.000 3.814 3.814 integrate_v_rspace 45 11.4 0.012 0.012 3.783 3.783 density_rs2pw 45 9.9 0.002 0.002 3.770 3.770 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.564 3.564 qs_ks_ddapc 45 10.4 0.001 0.001 3.431 3.431 qmmm_forces_gaussian_low_G 6 6.8 3.427 3.427 3.427 3.427 grid_collocate_task_list 45 9.9 3.216 3.216 3.216 3.216 pw_integral_ab 2539 7.4 2.803 2.803 2.803 2.803 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.050 59.275 59.288 qs_mol_dyn_low 1 2.0 0.003 0.007 58.192 58.253 qs_forces 6 3.8 0.001 0.001 43.408 43.408 qs_energies 6 4.8 0.000 0.004 41.499 41.499 scf_env_do_scf 6 5.8 0.000 0.001 40.406 40.406 scf_env_do_scf_inner_loop 113 6.2 0.002 0.016 38.787 38.788 rebuild_ks_matrix 119 8.1 0.000 0.000 27.330 27.343 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.017 27.330 27.343 qs_ks_update_qs_env 119 7.3 0.001 0.001 25.703 25.716 velocity_verlet 5 3.0 0.002 0.004 23.418 23.421 fft_wrap_pw1pw2 2059 12.4 0.041 0.045 16.884 17.338 fft_wrap_pw1pw2_150 1321 13.9 0.668 0.716 16.167 16.623 qs_vxc_create 119 10.1 0.002 0.002 13.563 13.577 xc_vxc_pw_create 119 11.1 0.154 0.218 13.561 13.576 fft3d_ps 2059 14.4 7.205 8.088 12.483 13.156 xc_pw_derive 714 13.1 0.013 0.015 10.221 10.501 qs_rho_update_rho_low 119 7.3 0.001 0.001 10.427 10.428 calculate_rho_elec 119 8.3 0.049 0.056 10.426 10.427 sum_up_and_integrate 119 10.1 0.002 0.002 9.445 9.501 integrate_v_rspace 119 11.1 0.003 0.004 9.392 9.441 qmmm_forces 6 3.8 0.002 0.002 7.445 7.445 qmmm_forces_with_gaussian 6 4.8 0.008 0.009 7.149 7.286 xc_pw_divergence 119 12.1 0.005 0.007 6.630 6.872 xc_rho_set_and_dset_create 119 12.1 0.366 0.449 6.568 6.751 qmmm_el_coupling 6 3.8 0.000 0.000 6.523 6.592 qmmm_elec_with_gaussian 6 4.8 0.003 0.003 6.521 6.591 density_rs2pw 119 9.3 0.006 0.007 5.980 6.338 potential_pw2rs 119 12.1 0.006 0.007 5.474 5.500 grid_collocate_task_list 119 9.3 4.311 4.625 4.311 4.625 qs_scf_new_mos 113 7.2 0.000 0.000 4.541 4.549 qs_scf_loop_do_ot 113 8.2 0.000 0.000 4.541 4.549 ot_scf_mini 113 9.2 0.001 0.001 4.453 4.460 dbcsr_multiply_generic 2588 12.3 0.058 0.062 4.308 4.359 mp_alltoall_z22v 2059 16.4 3.765 4.188 3.765 4.188 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.848 3.993 grid_integrate_task_list 119 12.1 3.746 3.841 3.746 3.841 transfer_pw2rs 500 12.8 0.006 0.007 3.769 3.807 ot_mini 113 10.2 0.000 0.001 3.486 3.494 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.292 3.323 qmmm_forces_gaussian_low_G 6 6.8 3.168 3.314 3.168 3.314 transfer_rs2pw 488 10.2 0.007 0.010 2.872 3.214 x_to_yz 1095 15.8 0.834 0.969 2.869 3.088 mp_waitany 4028 12.8 2.512 3.008 2.512 3.008 qs_ot_get_derivative 113 11.2 0.001 0.001 2.850 2.874 pw_restrict_s3 18 5.8 1.394 1.440 2.734 2.820 qmmm_elec_gaussian_low_G 6 6.8 2.706 2.739 2.706 2.739 yz_to_x 964 15.0 0.623 0.905 2.353 2.658 transfer_pw2rs_150 125 13.9 0.934 1.006 2.477 2.548 transfer_rs2pw_150 125 11.2 0.673 0.863 2.013 2.332 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.219 2.276 pw_prolongate_s3 18 6.8 1.171 1.199 2.218 2.276 qs_ot_get_derivative_diag 85 12.0 0.001 0.001 2.052 2.065 qs_ks_ddapc 119 10.1 0.002 0.003 1.972 2.024 pw_scatter_p 1095 14.8 1.684 1.717 1.684 1.717 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.637 1.638 init_scf_loop 6 6.8 0.000 0.000 1.615 1.615 pw_integral_ab 2761 7.7 1.269 1.324 1.470 1.557 mp_sum_l 12805 13.0 1.422 1.548 1.422 1.548 pw_gather_p 964 14.0 1.281 1.512 1.281 1.512 mp_sum_dm3 33 5.7 1.466 1.507 1.466 1.507 pw_derive 1089 13.4 1.404 1.469 1.404 1.469 pw_poisson_solve 125 9.9 0.003 0.003 1.391 1.412 pw_copy 2027 12.4 1.301 1.358 1.301 1.358 mp_waitall_1 177795 16.4 1.260 1.340 1.260 1.340 make_m2s 5176 13.3 0.040 0.041 1.126 1.212 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=22.24199999999999, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=81.34, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.991, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.427, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=5.19, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.427, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.216, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=33.706, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.706, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.668, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.168, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.311, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.765, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.205, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.746, yerr=0.0 Summary: Performance test took 36 minutes. Status: OK Removing intermediate container e38613649f3a ---> b5670dae2d54 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 3b08f92687c8 Removing intermediate container 3b08f92687c8 ---> 24fd2e459704 Step 42/42 : ENTRYPOINT [] ---> Running in 2444919e7d1a Removing intermediate container 2444919e7d1a ---> 4af0343f7263 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 4af0343f7263 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2024-04-24 08:52:58+00:00