StartDate: 2024-03-27 08:40:00+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: f7c1873a57b794a8507411d5b3e8a948e840a505 CommitTime: 2024-03-25 15:09:43 +0100 CommitAuthor: Max Graml CommitSubject: BSE: Fix conventions #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=f7c1873a57b794a8507411d5b3e8a948e840a505 Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 394.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu bccd10f490ab: Pulling fs layer bccd10f490ab: Verifying Checksum bccd10f490ab: Download complete bccd10f490ab: Pull complete Digest: sha256:77906da86b60585ce12215807090eb327e7386c8fafb5402369e421f44eff17e Status: Downloaded newer image for ubuntu:22.04 ---> ca2b0f26964c Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 03d16d9e2c6a Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 72223eb0b8d0 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 432bb79a897a Step 5/42 : RUN mkdir scripts ---> Using cache ---> 1bac2e842a38 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 94fa08efe2f0 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> a9437683062a Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> 12ff9cf83aed Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 6e9f97374a01 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> a08edad7e3e5 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 63778cccdf7f Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> e538b9780c22 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 0e32b0cd01a2 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> a06fd154f6ad Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 7db7df61870f Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 77a4daa79489 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 4a309c10153c Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 75034c85a6d6 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> c60a8852e081 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 0b1a3154e422 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> ebc0b9070a55 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> f9e582c35689 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 4878a7be96db Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 78be0b1c2070 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 6a512e3146fa Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 54e203058653 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> dd48e4a1468d Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 14e720b3ec10 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> a93ba70bc5e5 Step 30/42 : COPY ./Makefile . ---> Using cache ---> fc53b07bd5f8 Step 31/42 : COPY ./src ./src ---> Using cache ---> bc8054048ab5 Step 32/42 : COPY ./exts ./exts ---> Using cache ---> f5e10207fd68 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> c5f637015b49 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/" ---> Running in 3b36a169275a './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Removing intermediate container 3b36a169275a ---> beb72161f50a Step 35/42 : COPY ./data ./data ---> ce9a1eb2b855 Step 36/42 : COPY ./tests ./tests ---> e08fdd1d34c8 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> d8e23c4a58f5 Step 38/42 : COPY ./benchmarks ./benchmarks ---> 6ea605d2980a Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> f9159ce222e4 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 8a97200dcde9 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 75 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 86.283 86.283 qs_mol_dyn_low 1 2.0 0.003 0.003 85.710 85.710 qs_forces 11 3.9 0.001 0.001 85.668 85.668 qs_energies 11 4.9 0.001 0.001 79.458 79.458 scf_env_do_scf 11 5.9 0.002 0.002 68.427 68.427 velocity_verlet 10 3.0 0.002 0.002 55.277 55.277 scf_env_do_scf_inner_loop 108 6.5 0.012 0.012 54.812 54.812 qs_scf_new_mos 108 7.5 0.001 0.001 20.566 20.566 qs_scf_loop_do_ot 108 8.5 0.001 0.001 20.565 20.565 rebuild_ks_matrix 119 8.3 0.001 0.001 20.539 20.539 qs_ks_build_kohn_sham_matrix 119 9.3 0.013 0.013 20.538 20.538 dbcsr_multiply_generic 2286 12.5 0.159 0.159 20.418 20.418 qs_rho_update_rho_low 119 7.7 0.001 0.001 19.514 19.514 calculate_rho_elec 119 8.7 0.949 0.949 19.513 19.513 ot_scf_mini 108 9.5 0.002 0.002 19.280 19.280 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.634 18.634 grid_collocate_task_list 119 9.7 14.810 14.810 14.810 14.810 init_scf_loop 11 6.9 0.000 0.000 13.391 13.391 sum_up_and_integrate 119 10.3 0.001 0.001 13.306 13.306 integrate_v_rspace 119 11.3 0.087 0.087 13.249 13.249 ot_mini 108 10.5 0.001 0.001 12.558 12.558 grid_integrate_task_list 119 12.3 10.858 10.858 10.858 10.858 prepare_preconditioner 11 7.9 0.000 0.000 10.724 10.724 make_preconditioner 11 8.9 0.000 0.000 10.723 10.723 make_m2s 4572 13.5 0.045 0.045 10.638 10.638 make_full_inverse_cholesky 11 9.9 0.022 0.022 9.609 9.609 qs_ot_get_derivative 108 11.5 0.001 0.001 6.914 6.914 fft_wrap_pw1pw2 1201 11.6 0.009 0.009 6.669 6.669 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.439 6.439 multiply_cannon 2286 13.5 0.199 0.199 5.849 5.849 fft_wrap_pw1pw2_140 487 12.2 0.973 0.973 5.687 5.687 ot_diis_step 108 11.5 0.003 0.003 5.641 5.641 dbcsr_make_dense_low 5837 15.5 0.077 0.077 5.627 5.627 make_dense_data 5837 16.5 5.031 5.031 5.536 5.536 make_images 4572 14.5 2.274 2.274 5.443 5.443 multiply_cannon_loop 2286 14.5 0.058 0.058 5.316 5.316 multiply_cannon_multrec 2286 15.5 5.214 5.214 5.257 5.257 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.204 5.204 apply_single 119 13.6 0.000 0.000 5.203 5.203 dbcsr_make_images_dense 3978 14.8 0.017 0.017 4.870 4.870 density_rs2pw 119 9.7 0.004 0.004 3.754 3.754 init_scf_run 11 5.9 0.003 0.003 3.724 3.724 scf_env_initial_rho_setup 11 6.9 0.002 0.002 3.721 3.721 dbcsr_complete_redistribute 329 12.2 1.743 1.743 3.603 3.603 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.598 3.598 dbcsr_copy 2102 12.0 0.231 0.231 3.480 3.480 qs_ot_get_p 119 10.4 0.001 0.001 3.375 3.375 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.345 3.345 wfi_extrapolate 11 7.9 0.001 0.001 3.244 3.244 dbcsr_copy_into_existing 22 7.9 3.190 3.190 3.190 3.190 qs_create_task_list 11 7.9 0.000 0.000 3.188 3.188 generate_qs_task_list 11 8.9 1.945 1.945 3.188 3.188 copy_dbcsr_to_fm 153 11.3 0.002 0.002 3.054 3.054 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.863 2.863 fft3d_s 1202 13.6 2.853 2.853 2.858 2.858 cp_fm_cholesky_invert 11 10.9 2.799 2.799 2.799 2.799 cp_fm_cholesky_decompose 22 10.9 2.708 2.708 2.708 2.708 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.420 2.420 potential_pw2rs 119 12.3 0.047 0.047 2.303 2.303 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.249 2.249 pw_poisson_solve 119 10.3 0.002 0.002 2.136 2.136 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.112 2.112 qs_ot_p2m_diag 50 11.0 0.154 0.154 2.034 2.034 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 2.013 2.013 dbcsr_data_release 279532 16.0 1.989 1.989 1.989 1.989 dbcsr_finalize 5186 13.8 0.130 0.130 1.944 1.944 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.791 1.791 cp_fm_upper_to_full 72 14.2 1.764 1.764 1.764 1.764 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.028 46.456 46.468 qs_mol_dyn_low 1 2.0 0.003 0.004 46.327 46.332 qs_forces 11 3.9 0.001 0.002 46.284 46.284 qs_energies 11 4.9 0.001 0.001 43.276 43.277 scf_env_do_scf 11 5.9 0.000 0.003 39.737 39.737 scf_env_do_scf_inner_loop 108 6.5 0.003 0.022 36.561 36.561 velocity_verlet 10 3.0 0.001 0.003 27.548 27.549 rebuild_ks_matrix 119 8.3 0.000 0.001 16.905 16.964 qs_ks_build_kohn_sham_matrix 119 9.3 0.016 0.017 16.905 16.963 qs_ks_update_qs_env 119 7.6 0.001 0.002 15.021 15.061 dbcsr_multiply_generic 2286 12.5 0.073 0.074 14.144 14.248 qs_rho_update_rho_low 119 7.7 0.001 0.001 12.782 12.788 calculate_rho_elec 119 8.7 0.030 0.032 12.781 12.788 sum_up_and_integrate 119 10.3 0.002 0.003 12.466 12.489 integrate_v_rspace 119 11.3 0.004 0.004 12.443 12.466 qs_scf_new_mos 108 7.5 0.001 0.001 11.466 11.519 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.465 11.519 ot_scf_mini 108 9.5 0.002 0.002 10.788 10.846 multiply_cannon 2286 13.5 0.138 0.147 10.519 10.691 multiply_cannon_loop 2286 14.5 0.116 0.126 9.937 10.143 grid_collocate_task_list 119 9.7 9.009 9.352 9.009 9.352 grid_integrate_task_list 119 12.3 8.596 8.874 8.596 8.874 mp_waitall_1 158411 16.6 7.548 7.922 7.548 7.922 ot_mini 108 10.5 0.001 0.001 6.306 6.367 multiply_cannon_metrocomm3 18288 15.5 0.056 0.059 5.846 6.109 multiply_cannon_multrec 18288 15.5 3.657 3.758 3.669 3.769 density_rs2pw 119 9.7 0.005 0.006 3.375 3.758 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.245 3.315 apply_single 119 13.6 0.000 0.000 3.245 3.315 fft_wrap_pw1pw2 1201 11.6 0.017 0.020 3.233 3.285 potential_pw2rs 119 12.3 0.007 0.007 3.260 3.267 qs_ot_get_derivative 108 11.5 0.001 0.001 3.178 3.233 init_scf_loop 11 6.9 0.000 0.000 3.160 3.161 ot_diis_step 108 11.5 0.003 0.004 3.112 3.112 fft_wrap_pw1pw2_140 487 12.2 0.146 0.152 2.740 2.804 make_m2s 4572 13.5 0.047 0.056 2.535 2.579 init_scf_run 11 5.9 0.000 0.006 2.484 2.487 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.483 2.487 fft3d_ps 1201 13.6 1.219 1.277 2.315 2.382 transfer_pw2rs 487 13.2 0.006 0.007 2.378 2.382 wfi_extrapolate 11 7.9 0.001 0.001 2.263 2.263 make_images 4572 14.5 0.117 0.126 2.191 2.234 transfer_rs2pw 487 10.6 0.007 0.008 1.821 2.226 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.023 2.041 mp_waitany 9880 13.7 1.386 1.815 1.386 1.815 qs_ot_get_p 119 10.4 0.001 0.001 1.476 1.529 transfer_rs2pw_140 130 11.5 0.209 0.234 1.008 1.411 prepare_preconditioner 11 7.9 0.000 0.000 1.327 1.342 make_preconditioner 11 8.9 0.000 0.000 1.327 1.342 transfer_pw2rs_140 130 13.9 0.430 0.465 1.269 1.306 make_images_data 4572 15.5 0.040 0.047 1.214 1.294 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.208 1.221 hybrid_alltoall_any 4725 16.4 0.073 0.210 1.081 1.177 mp_alltoall_d11v 2130 13.8 0.879 1.147 0.879 1.147 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.955 0.986 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.948 0.976 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.961 0.962 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.882 0.957 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=47.18000000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.81, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.858, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.214, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.031, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=3.19, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=16.260000000000005, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.009, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.596, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.657, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.386, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.548, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.039 0.039 109.925 109.925 qs_mol_dyn_low 1 2.0 0.003 0.003 109.261 109.261 qs_forces 11 3.9 0.001 0.001 109.220 109.220 qs_energies 11 4.9 0.001 0.001 101.686 101.686 scf_env_do_scf 11 5.9 0.001 0.001 88.840 88.840 scf_env_do_scf_inner_loop 96 6.5 0.011 0.011 73.373 73.373 velocity_verlet 10 3.0 0.002 0.002 69.412 69.412 rebuild_ks_matrix 107 8.3 0.001 0.001 33.924 33.924 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 33.923 33.923 qs_rho_update_rho_low 107 7.7 0.000 0.000 31.351 31.351 calculate_rho_elec 107 8.7 0.857 0.857 31.350 31.350 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.583 30.583 grid_collocate_task_list 107 9.7 27.071 27.071 27.071 27.071 sum_up_and_integrate 107 10.3 0.001 0.001 26.588 26.588 integrate_v_rspace 107 11.3 0.105 0.105 26.556 26.556 grid_integrate_task_list 107 12.3 24.461 24.461 24.461 24.461 dbcsr_multiply_generic 1966 12.4 0.151 0.151 18.888 18.888 qs_scf_new_mos 96 7.5 0.000 0.000 18.605 18.605 qs_scf_loop_do_ot 96 8.5 0.001 0.001 18.605 18.605 ot_scf_mini 96 9.5 0.002 0.002 17.440 17.440 init_scf_loop 11 6.9 0.000 0.000 15.348 15.348 ot_mini 96 10.5 0.001 0.001 11.329 11.329 prepare_preconditioner 11 7.9 0.000 0.000 10.976 10.976 make_preconditioner 11 8.9 0.000 0.000 10.976 10.976 make_m2s 3932 13.4 0.040 0.040 9.960 9.960 make_full_inverse_cholesky 11 9.9 0.020 0.020 9.900 9.900 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.949 6.949 qs_ot_get_derivative 96 11.5 0.001 0.001 6.238 6.238 fft_wrap_pw1pw2 1081 11.6 0.008 0.008 6.017 6.017 multiply_cannon 1966 13.4 0.156 0.156 5.417 5.417 make_images 3932 14.4 2.128 2.128 5.221 5.221 init_scf_run 11 5.9 0.003 0.003 5.201 5.201 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.198 5.198 fft_wrap_pw1pw2_140 439 12.2 0.789 0.789 5.152 5.152 dbcsr_make_dense_low 4961 15.5 0.057 0.057 5.108 5.108 ot_diis_step 96 11.5 0.003 0.003 5.088 5.088 make_dense_data 4961 16.5 4.556 4.556 5.039 5.039 multiply_cannon_loop 1966 14.4 0.041 0.041 5.011 5.011 multiply_cannon_multrec 1966 15.4 4.926 4.926 4.969 4.969 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.757 4.757 apply_single 107 13.6 0.000 0.000 4.757 4.757 wfi_extrapolate 11 7.9 0.001 0.001 4.570 4.570 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.448 4.448 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.194 4.194 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.070 4.070 qs_create_task_list 11 7.9 0.000 0.000 3.644 3.644 generate_qs_task_list 11 8.9 2.381 2.381 3.644 3.644 dbcsr_copy 1855 11.9 0.217 0.217 3.509 3.509 density_rs2pw 107 9.7 0.003 0.003 3.423 3.423 dbcsr_complete_redistribute 317 12.2 1.720 1.720 3.395 3.395 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.338 3.338 dbcsr_copy_into_existing 22 7.9 3.214 3.214 3.215 3.215 qs_ot_get_p 107 10.4 0.001 0.001 3.059 3.059 cp_fm_cholesky_invert 11 10.9 3.045 3.045 3.045 3.045 copy_dbcsr_to_fm 147 11.2 0.002 0.002 2.809 2.809 cp_fm_cholesky_decompose 22 10.9 2.793 2.793 2.793 2.793 fft3d_s 1082 13.6 2.624 2.624 2.629 2.629 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.451 2.451 pw_poisson_solve 107 10.3 0.002 0.002 2.373 2.373 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.224 2.224 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.029 74.086 74.098 qs_mol_dyn_low 1 2.0 0.003 0.005 73.940 73.949 qs_forces 11 3.9 0.001 0.002 73.889 73.889 qs_energies 11 4.9 0.001 0.001 69.049 69.051 scf_env_do_scf 11 5.9 0.000 0.002 63.939 63.940 scf_env_do_scf_inner_loop 96 6.5 0.003 0.017 59.009 59.009 velocity_verlet 10 3.0 0.001 0.003 44.501 44.503 rebuild_ks_matrix 107 8.3 0.000 0.001 31.062 31.130 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.016 31.062 31.129 qs_ks_update_qs_env 107 7.6 0.001 0.001 27.350 27.410 sum_up_and_integrate 107 10.3 0.002 0.002 26.854 26.877 integrate_v_rspace 107 11.3 0.004 0.004 26.832 26.857 qs_rho_update_rho_low 107 7.7 0.001 0.001 26.784 26.790 calculate_rho_elec 107 8.7 0.027 0.029 26.783 26.789 grid_integrate_task_list 107 12.3 22.723 23.070 22.723 23.070 grid_collocate_task_list 107 9.7 21.891 23.069 21.891 23.069 dbcsr_multiply_generic 1966 12.4 0.066 0.070 13.507 13.633 qs_scf_new_mos 96 7.5 0.001 0.001 10.710 10.747 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.709 10.747 multiply_cannon 1966 13.4 0.127 0.137 10.193 10.459 ot_scf_mini 96 9.5 0.002 0.002 10.129 10.173 multiply_cannon_loop 1966 14.4 0.105 0.111 9.665 9.902 mp_waitall_1 136719 16.5 7.469 7.794 7.469 7.794 multiply_cannon_metrocomm3 15728 15.4 0.049 0.052 5.827 6.145 ot_mini 96 10.5 0.001 0.001 6.018 6.066 density_rs2pw 107 9.7 0.005 0.005 4.520 5.231 init_scf_loop 11 6.9 0.000 0.000 4.914 4.914 init_scf_run 11 5.9 0.000 0.005 4.025 4.025 scf_env_initial_rho_setup 11 6.9 0.000 0.004 4.025 4.025 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.831 3.840 transfer_rs2pw 439 10.6 0.006 0.006 3.088 3.789 wfi_extrapolate 11 7.9 0.001 0.001 3.675 3.675 multiply_cannon_multrec 15728 15.4 3.449 3.662 3.460 3.674 mp_waitany 8968 13.7 2.708 3.440 2.708 3.440 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.185 3.245 apply_single 107 13.6 0.000 0.000 3.185 3.244 potential_pw2rs 107 12.3 0.006 0.007 3.172 3.179 ot_diis_step 96 11.5 0.003 0.003 3.063 3.064 fft_wrap_pw1pw2 1081 11.6 0.015 0.018 2.978 3.022 qs_ot_get_derivative 96 11.5 0.001 0.001 2.933 2.973 transfer_rs2pw_140 118 11.5 0.158 0.180 2.269 2.971 fft_wrap_pw1pw2_140 439 12.2 0.129 0.135 2.541 2.597 transfer_pw2rs 439 13.2 0.006 0.007 2.382 2.388 make_m2s 3932 13.4 0.041 0.049 2.276 2.337 fft3d_ps 1081 13.6 1.093 1.166 2.142 2.192 make_images 3932 14.4 0.105 0.109 1.974 2.034 mp_alltoall_d11v 1998 13.7 1.209 1.846 1.209 1.846 rs_gather_matrices 107 12.3 0.063 0.071 0.897 1.521 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=45.697, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.071, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.461, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.926, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.556, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=3.214, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=15.846000000000004, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.891, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.723, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.449, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.708, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.469, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.222 0.222 134.120 134.120 qs_energies 1 2.0 0.000 0.000 133.316 133.316 scf_env_do_scf 1 3.0 0.000 0.000 132.134 132.134 qs_ks_update_qs_env 8 5.0 0.000 0.000 127.459 127.459 rebuild_ks_matrix 7 6.0 0.000 0.000 127.403 127.403 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 127.403 127.403 hfx_ks_matrix 7 8.0 0.000 0.000 118.863 118.863 integrate_four_center 7 9.0 1.569 1.569 118.730 118.730 integrate_four_center_main 7 10.0 0.502 0.502 104.870 104.870 integrate_four_center_bin 453 11.0 104.368 104.368 104.368 104.368 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 75.371 75.371 init_scf_loop 1 4.0 0.000 0.000 56.755 56.755 integrate_four_center_load 7 10.0 0.001 0.001 12.005 12.005 hfx_load_balance 1 11.0 0.001 0.001 12.005 12.005 hfx_load_balance_bin 1 12.0 5.996 5.996 5.996 5.996 hfx_load_balance_count 1 12.0 5.953 5.953 5.953 5.953 qs_vxc_create 14 8.0 0.000 0.000 3.281 3.281 xc_vxc_pw_create 14 9.0 0.117 0.117 3.280 3.280 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.219 0.254 130.510 130.526 qs_energies 1 2.0 0.000 0.000 130.084 130.094 scf_env_do_scf 1 3.0 0.000 0.000 129.733 129.734 qs_ks_update_qs_env 8 5.0 0.000 0.000 127.509 127.510 rebuild_ks_matrix 7 6.0 0.000 0.000 127.500 127.501 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 127.500 127.501 hfx_ks_matrix 7 8.0 0.000 0.000 121.894 121.897 integrate_four_center 7 9.0 0.051 0.338 121.886 121.888 integrate_four_center_main 7 10.0 0.003 0.003 102.705 108.987 integrate_four_center_bin 448 11.0 102.702 108.984 102.702 108.984 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 76.976 76.977 init_scf_loop 1 4.0 0.000 0.000 52.755 52.755 mp_sync 56 11.2 6.595 12.752 6.595 12.752 integrate_four_center_load 7 10.0 0.000 0.000 11.909 11.909 hfx_load_balance 1 11.0 0.001 0.001 11.909 11.909 hfx_load_balance_bin 1 12.0 5.894 5.960 5.894 5.960 hfx_load_balance_count 1 12.0 5.881 5.952 5.881 5.952 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=15.51000000000002, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=104.368, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.996, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.953, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.569, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.502, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.222, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.164999999999992, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=102.702, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.894, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.881, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.051, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.219, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=6.595, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 78.317 78.317 qs_energies 1 2.0 0.000 0.000 77.944 77.944 mp2_main 1 3.0 0.000 0.000 74.921 74.921 mp2_gpw_main 1 4.0 0.000 0.000 74.817 74.817 rpa_ri_compute_en 1 5.0 0.000 0.000 71.297 71.297 rpa_num_int 1 6.0 0.001 0.001 71.291 71.291 compute_mat_P_omega 1 7.0 0.004 0.004 62.517 62.517 compute_mat_P_omega_contract 10 8.0 8.958 8.958 62.325 62.325 dbt_total 2336 9.6 0.014 0.014 48.446 48.446 dbt_contract 787 11.0 0.042 0.042 41.472 41.472 dbt_tas_total 1149 12.2 0.207 0.207 40.371 40.371 dbt_tas_multiply 807 12.1 0.002 0.002 39.014 39.014 dbt_tas_dbm 807 14.1 0.003 0.003 32.640 32.640 dbm_multiply 807 16.1 32.632 32.632 32.632 32.632 dbt_tas_mm_1N 524 15.1 0.002 0.002 25.394 25.394 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 23.682 23.682 compute_mat_P_omega_calc_M_occ 250 9.0 8.975 8.975 17.234 17.234 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 6.756 6.756 dbt_tas_mm_2 251 15.0 0.001 0.001 5.759 5.759 dbt_copy 1103 10.7 0.098 0.098 5.598 5.598 compute_QP_energies 1 7.0 0.000 0.000 5.032 5.032 compute_self_energy_cubic_gw 1 8.0 0.052 0.052 5.030 5.030 contract_cubic_gw 21 9.0 0.000 0.000 4.048 4.048 dbt_tas_reserve_blocks_index 3261 14.3 0.486 0.486 3.675 3.675 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.512 3.512 dbm_reserve_blocks 3628 15.3 3.259 3.259 3.259 3.259 scf_env_do_scf 1 3.0 0.000 0.000 2.897 2.897 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.897 2.897 dbt_reserve_blocks_index 2280 13.1 0.056 0.056 2.797 2.797 dbt_reserve_blocks_index_array 2222 12.2 0.009 0.009 2.740 2.740 dbt_crop 1042 12.0 1.408 1.408 2.339 2.339 convert_to_new_pgrid 2421 14.1 0.124 0.124 2.254 2.254 dbt_tas_copy 574 11.4 1.273 1.273 2.198 2.198 dbm_copy 1614 15.1 2.130 2.130 2.130 2.130 dbt_tas_reshape 367 15.0 0.007 0.007 2.053 2.053 dbt_reshape 278 11.9 1.019 1.019 1.877 1.877 get_2c_integrals 1 6.0 0.000 0.000 1.585 1.585 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.032 33.321 33.333 qs_energies 1 2.0 0.000 0.000 33.157 33.159 mp2_main 1 3.0 0.000 0.000 32.151 32.153 mp2_gpw_main 1 4.0 0.000 0.001 32.015 32.017 rpa_ri_compute_en 1 5.0 0.000 0.000 30.707 30.709 rpa_num_int 1 6.0 0.000 0.002 30.707 30.709 dbt_total 2336 9.6 0.015 0.015 27.311 27.322 compute_mat_P_omega 1 7.0 0.001 0.006 26.212 26.214 compute_mat_P_omega_contract 10 8.0 0.412 0.425 26.079 26.082 dbt_contract 787 11.0 0.038 0.039 20.236 20.241 dbt_tas_total 1149 12.2 0.081 0.086 17.915 17.916 dbt_tas_multiply 807 12.1 0.002 0.002 17.838 17.840 dbt_tas_dbm 807 14.1 0.003 0.003 12.794 12.800 dbm_multiply 807 16.1 10.234 10.920 10.234 10.920 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.114 8.115 compute_mat_P_omega_calc_M_occ 250 9.0 0.396 0.409 7.622 7.623 dbt_copy 1149 10.8 0.013 0.014 6.105 6.293 dbt_tas_mm_2 251 15.0 0.001 0.001 6.183 6.193 dbt_reshape 1136 11.8 2.644 2.821 5.806 5.987 mp_sync 8688 11.6 4.493 5.743 4.493 5.743 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.191 5.192 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.497 4.988 compute_QP_energies 1 7.0 0.000 0.000 2.807 2.809 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 2.807 2.807 mp_waitall_2 3812 15.3 2.348 2.528 2.348 2.528 dbt_communicate_buffer 1136 12.8 0.052 0.062 2.330 2.432 contract_cubic_gw 21 9.0 0.000 0.000 2.194 2.195 dbt_reserve_blocks_index 2887 13.1 0.069 0.075 1.741 1.912 dbt_reserve_blocks_index_array 2829 12.2 0.010 0.011 1.733 1.903 dbt_tas_reserve_blocks_index 3347 14.5 0.432 0.471 1.723 1.887 dbt_crop 1042 12.0 0.914 1.007 1.458 1.607 dbm_reserve_blocks 3752 15.4 1.384 1.529 1.384 1.529 mp2_ri_gpw_compute_in 1 5.0 0.003 0.004 1.306 1.306 dbt_tas_replicate 405 14.1 0.538 0.697 1.104 1.167 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.071 1.073 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.006 1.009 parallel_gemm_fm 105 8.4 0.000 0.000 0.973 0.988 parallel_gemm_fm_cosma 105 9.4 0.973 0.988 0.973 0.988 scf_env_do_scf 1 3.0 0.000 0.000 0.958 0.958 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 0.957 0.958 convert_to_new_pgrid 2421 14.1 0.027 0.029 0.803 0.908 dbm_copy 1608 15.1 0.771 0.879 0.771 0.879 mp_max_i 2009 9.8 0.606 0.796 0.606 0.796 compute_W_cubic_GW 10 7.0 0.001 0.001 0.749 0.757 dbt_tas_create_split 2550 15.2 0.004 0.005 0.651 0.690 dbt_tas_create_split_rows_or_c 2550 16.2 0.012 0.013 0.647 0.686 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=21.343999999999994, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=32.632, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.975, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.958, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.259, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.13, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.019, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=10.638999999999996, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.234, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.396, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.412, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.384, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.771, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.644, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.348, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=4.493, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.023 0.023 261.804 261.804 qs_forces 1 2.0 0.000 0.000 261.262 261.262 rebuild_ks_matrix 7 6.6 0.000 0.000 259.788 259.788 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 259.788 259.788 hfx_ks_matrix 7 8.6 0.000 0.000 257.635 257.635 hfx_ri_update_ks 7 9.6 0.000 0.000 217.105 217.105 hfx_ri_update_ks_Pmat 7 10.6 31.730 31.730 217.101 217.101 dbt_total 849 11.0 0.006 0.006 202.959 202.959 qs_energies 1 3.0 0.000 0.000 193.094 193.094 scf_env_do_scf 1 4.0 0.000 0.000 192.778 192.778 qs_ks_update_qs_env 8 6.0 0.000 0.000 191.670 191.670 dbt_contract 207 12.4 0.112 0.112 183.429 183.429 dbt_tas_total 369 13.4 1.716 1.716 182.566 182.566 dbt_tas_multiply 216 13.5 0.001 0.001 178.161 178.161 dbt_tas_dbm 216 15.5 0.001 0.001 166.194 166.194 dbm_multiply 216 17.5 166.191 166.191 166.191 166.191 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 161.013 161.013 dbt_tas_mm_2 91 16.5 0.001 0.001 152.433 152.433 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 120.802 120.802 init_scf_loop 2 5.0 0.000 0.000 71.974 71.974 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 68.121 68.121 hfx_ri_update_forces 1 7.0 1.650 1.650 40.527 40.527 hfx_ri_forces_Pmat_3c 1 8.0 4.776 4.776 23.218 23.218 dbt_copy 423 11.8 0.039 0.039 15.118 15.118 precalc_derivatives 1 8.0 2.246 2.246 13.225 13.225 dbt_reshape 132 13.2 6.504 6.504 10.562 10.562 dbt_tas_mm_3T 77 17.1 0.000 0.000 9.516 9.516 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.424 9.424 dbt_tas_reserve_blocks_index 1323 15.4 1.025 1.025 7.747 7.747 build_3c_derivatives 3 9.0 2.517 2.517 7.419 7.419 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 7.182 7.182 dbm_reserve_blocks 1491 16.2 6.962 6.962 6.962 6.962 dbt_reserve_blocks_index 846 14.4 0.095 0.095 6.099 6.099 dbt_reserve_blocks_index_array 816 13.5 0.007 0.007 5.994 5.994 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.033 48.944 48.956 qs_forces 1 2.0 0.000 0.000 48.751 48.751 rebuild_ks_matrix 7 6.6 0.000 0.000 47.948 47.949 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 47.948 47.949 hfx_ks_matrix 7 8.6 0.000 0.000 46.929 46.937 dbt_total 849 11.0 0.006 0.006 41.879 41.891 dbt_contract 207 12.4 0.025 0.028 32.215 32.241 dbt_tas_total 369 13.4 0.088 0.263 28.750 28.764 dbt_tas_multiply 216 13.5 0.001 0.001 28.206 28.221 hfx_ri_update_ks 7 9.6 0.000 0.000 27.365 27.366 hfx_ri_update_ks_Pmat 7 10.6 1.240 1.293 27.362 27.363 qs_energies 1 3.0 0.000 0.000 25.375 25.375 scf_env_do_scf 1 4.0 0.000 0.001 25.228 25.228 qs_ks_update_qs_env 8 6.0 0.000 0.000 24.585 24.585 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 23.365 23.365 dbt_tas_dbm 216 15.5 0.001 0.001 21.315 21.328 dbm_multiply 216 17.5 19.419 20.247 19.419 20.247 hfx_ri_update_forces 1 7.0 0.060 0.062 19.563 19.571 hfx_ri_forces_Pmat_3c 1 8.0 0.165 0.175 14.468 14.480 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 13.630 13.630 hfx_ri_update_ks_Pmat_KS 63 11.6 0.000 0.001 11.866 11.878 init_scf_loop 2 5.0 0.000 0.000 11.596 11.597 dbt_tas_mm_2 91 16.5 0.001 0.001 9.565 9.578 dbt_copy 539 12.5 0.012 0.013 8.527 8.688 dbt_reshape 393 13.9 3.499 3.624 6.484 6.630 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.905 6.358 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 5.838 5.851 mp_sync 2901 12.8 4.415 5.482 4.415 5.482 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.313 4.437 precalc_derivatives 1 8.0 0.090 0.097 3.863 3.863 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.344 3.345 dbt_tas_reserve_blocks_index 1471 15.8 0.894 0.919 3.119 3.305 mp_waitall_2 1318 16.2 2.807 2.940 2.807 2.940 dbt_reserve_blocks_index 1107 14.8 0.110 0.114 2.559 2.695 dbt_reserve_blocks_index_array 1077 13.9 0.006 0.006 2.532 2.665 dbm_reserve_blocks 1641 16.6 2.443 2.609 2.443 2.609 dbt_crop 372 13.7 1.705 1.783 2.468 2.594 build_3c_derivatives 3 9.0 0.224 0.241 2.185 2.193 dbt_communicate_buffer 393 14.9 0.014 0.015 1.936 2.042 dbt_tas_replicate 170 15.1 0.676 0.708 1.696 1.743 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.730 1.733 convert_to_new_pgrid 648 15.5 0.036 0.061 1.353 1.680 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.608 1.614 dbm_copy 452 16.3 1.182 1.510 1.182 1.510 dbt_tas_copy 146 12.6 0.684 0.714 1.296 1.389 mp_sum_l 6385 13.7 1.035 1.138 1.035 1.138 dbt_tas_communicate_buffer 370 16.3 0.013 0.013 0.997 1.040 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=45.64099999999999, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=166.191, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=31.73, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.962, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=6.504, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.776, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=14.956000000000003, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=19.419, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.24, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.443, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.499, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.165, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=4.415, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.807, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 183.274 183.274 qs_energies 1 2.0 0.000 0.000 183.092 183.092 mp2_main 1 3.0 0.000 0.000 178.367 178.367 mp2_gpw_main 1 4.0 0.001 0.001 177.986 177.986 mp2_ri_gpw_compute_in 1 5.0 0.382 0.382 133.596 133.596 mp2_ri_gpw_compute_in_loop 1 6.0 0.009 0.009 124.354 124.354 mp2_eri_3c_integrate_gpw 2656 7.0 0.012 0.012 96.557 96.557 integrate_v_rspace 2666 8.0 0.627 0.627 83.628 83.628 grid_integrate_task_list 2666 9.0 80.858 80.858 80.858 80.858 mp2_ri_gpw_compute_en 1 5.0 0.084 0.084 44.367 44.367 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.432 9.432 42.496 42.496 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.145 2.145 25.747 25.747 local_gemm 2080 8.0 23.602 23.602 23.602 23.602 dbcsr_multiply_generic 5322 8.0 0.176 0.176 21.580 21.580 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 21.558 21.558 fft_wrap_pw1pw2 53228 10.4 0.109 0.109 12.007 12.007 calculate_wavefunction 2656 8.0 7.897 7.897 11.653 11.653 multiply_cannon 5322 9.0 0.440 0.440 11.058 11.058 multiply_cannon_loop 5322 10.0 0.180 0.180 9.702 9.702 get_2c_integrals 1 6.0 0.000 0.000 8.859 8.859 fft_wrap_pw1pw2_20 21271 11.4 1.057 1.057 8.765 8.765 make_m2s 10644 9.0 0.060 0.060 8.281 8.281 compute_2c_integrals 1 7.0 0.006 0.006 8.195 8.195 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 8.182 8.182 multiply_cannon_multrec 5322 11.0 8.134 8.134 8.173 8.173 mp2_eri_2c_integrate_gpw 1 9.0 0.868 0.868 8.171 8.171 make_images 10644 10.0 3.165 3.165 7.949 7.949 fft3d_s 53229 12.4 6.525 6.525 6.557 6.557 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.256 2.256 6.122 6.122 mp2_ri_gpw_compute_en_ener 2080 7.0 5.483 5.483 5.483 5.483 copy_dbcsr_to_fm 2679 8.0 0.025 0.025 4.317 4.317 scf_env_do_scf 1 3.0 0.000 0.000 4.300 4.300 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.300 4.300 potential_pw2rs 5322 10.0 0.144 0.144 4.217 4.217 collocate_single_gaussian 2656 10.0 0.138 0.138 3.740 3.740 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.035 38.071 38.083 qs_energies 1 2.0 0.000 0.000 37.597 37.598 mp2_main 1 3.0 0.000 0.001 35.614 35.614 mp2_gpw_main 1 4.0 0.001 0.002 35.511 35.511 mp2_ri_gpw_compute_in 1 5.0 0.043 0.044 18.415 18.755 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 17.145 17.488 mp2_ri_gpw_compute_en 1 5.0 0.268 0.281 17.035 17.321 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.281 2.459 15.813 15.828 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 15.094 15.379 integrate_v_rspace 93 8.1 0.101 0.115 14.975 15.264 grid_integrate_task_list 93 9.1 14.690 14.970 14.690 14.970 mp2_ri_gpw_compute_en_expansio 65 7.0 0.097 0.111 11.030 11.159 local_gemm 65 8.0 10.933 11.062 10.933 11.062 mp2_ri_gpw_compute_en_comm 17 7.0 0.097 0.122 2.177 2.886 mp_sendrecv_dm3 1054 8.0 1.599 2.412 1.599 2.412 dbcsr_multiply_generic 176 8.0 0.007 0.008 1.739 1.874 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.724 1.859 scf_env_do_scf 1 3.0 0.000 0.000 1.845 1.846 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 1.845 1.846 get_2c_integrals 1 6.0 0.004 0.013 1.209 1.223 multiply_cannon 176 9.0 0.017 0.018 1.044 1.109 multiply_cannon_loop 176 10.0 0.002 0.002 0.986 1.049 compute_2c_integrals 1 7.0 0.003 0.003 0.895 0.906 multiply_cannon_multrec 246 11.0 0.856 0.895 0.860 0.900 qs_scf_new_mos 10 5.0 0.000 0.000 0.874 0.877 eigensolver 11 5.8 0.001 0.001 0.831 0.833 compute_2c_integrals_loop_lm 1 8.0 0.001 0.002 0.802 0.826 mp2_eri_2c_integrate_gpw 1 9.0 0.220 0.232 0.801 0.826 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=53.351, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=80.858, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=23.602, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.432, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.134, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=7.897, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.712, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.69, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=10.933, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=2.281, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.856, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.599, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.096 0.096 131.144 131.144 qs_energies 1 2.0 0.000 0.000 129.802 129.802 scf_env_do_scf 1 3.0 0.000 0.000 122.474 122.474 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 122.474 122.474 qs_ks_update_qs_env 15 5.0 0.000 0.000 54.983 54.983 rebuild_ks_matrix 15 6.0 0.000 0.000 54.771 54.771 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 54.771 54.771 qs_scf_new_mos 15 5.0 0.000 0.000 43.229 43.229 qs_vxc_create 15 8.0 0.018 0.018 38.274 38.274 eigensolver 15 6.0 0.002 0.002 35.395 35.395 calculate_dispersion_nonloc 15 9.0 6.658 6.658 32.937 32.937 fft_wrap_pw1pw2 1086 10.0 0.013 0.013 28.482 28.482 cp_fm_diag_elpa 15 7.0 0.000 0.000 22.213 22.213 cp_fm_diag_elpa_base 15 8.0 19.622 19.622 22.213 22.213 qs_rho_update_rho_low 16 5.0 0.000 0.000 20.773 20.773 calculate_rho_elec 16 6.0 0.215 0.215 20.773 20.773 fft_wrap_pw1pw2_150 765 11.0 6.762 6.762 20.590 20.590 grid_collocate_task_list 16 7.0 19.129 19.129 19.129 19.129 sum_up_and_integrate 15 8.0 0.000 0.000 15.412 15.412 integrate_v_rspace 15 9.0 0.023 0.023 15.352 15.352 grid_integrate_task_list 15 10.0 14.513 14.513 14.513 14.513 fft3d_s 1087 12.0 10.776 10.776 10.784 10.784 cp_fm_cholesky_restore 45 7.0 10.774 10.774 10.774 10.774 fft_wrap_pw1pw2_200 197 11.3 1.752 1.752 7.668 7.668 pw_scatter_s 585 12.1 7.329 7.329 7.329 7.329 dbcsr_complete_redistribute 46 8.3 1.975 1.975 5.482 5.482 copy_dbcsr_to_fm 16 5.9 0.000 0.000 5.342 5.342 xc_vxc_pw_create 15 9.0 0.205 0.205 5.319 5.319 cp_fm_upper_to_full 30 8.0 4.997 4.997 4.997 4.997 gspace_mixing 14 5.0 0.170 0.170 4.668 4.668 vdW_energy 15 10.0 4.364 4.364 4.364 4.364 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.198 4.198 broyden_mixing 14 6.0 4.008 4.008 4.009 4.009 xc_pw_derive 90 11.0 0.001 0.001 3.838 3.838 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 3.019 3.019 calculate_dm_sparse 15 6.0 0.016 0.016 2.828 2.828 cp_dbcsr_plus_fm_fm_t_native 15 7.0 0.000 0.000 2.748 2.748 xc_pw_divergence 15 10.0 0.000 0.000 2.718 2.718 init_scf_run 1 3.0 0.000 0.000 2.667 2.667 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.034 60.955 60.968 qs_energies 1 2.0 0.000 0.000 60.374 60.381 scf_env_do_scf 1 3.0 0.000 0.001 56.160 56.161 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 56.160 56.161 qs_ks_update_qs_env 15 5.0 0.000 0.000 24.411 24.422 rebuild_ks_matrix 15 6.0 0.000 0.000 24.378 24.390 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 24.378 24.390 qs_rho_update_rho_low 16 5.0 0.000 0.000 19.361 19.367 calculate_rho_elec 16 6.0 0.007 0.010 19.361 19.367 grid_collocate_task_list 16 7.0 17.962 18.270 17.962 18.270 sum_up_and_integrate 15 8.0 0.000 0.000 14.881 14.924 integrate_v_rspace 15 9.0 0.001 0.001 14.873 14.920 grid_integrate_task_list 15 10.0 14.074 14.237 14.074 14.237 qs_scf_new_mos 15 5.0 0.000 0.000 12.913 12.938 eigensolver 15 6.0 0.001 0.002 11.881 11.896 qs_vxc_create 15 8.0 0.001 0.005 9.105 9.117 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.376 8.381 cp_fm_diag_elpa_base 15 8.0 8.237 8.262 8.368 8.370 calculate_dispersion_nonloc 15 9.0 0.905 0.925 7.340 7.360 fft_wrap_pw1pw2 1086 10.0 0.020 0.022 6.748 6.804 fft3d_ps 1086 12.0 2.578 2.746 4.948 5.063 fft_wrap_pw1pw2_150 765 11.0 0.125 0.141 4.375 4.400 cp_fm_cholesky_restore 45 7.0 3.354 3.396 3.354 3.396 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.666 2.666 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.375 2.569 fft_wrap_pw1pw2_200 197 11.3 0.094 0.102 2.248 2.318 mp_alltoall_z22v 1086 14.0 1.774 2.105 1.774 2.105 xc_vxc_pw_create 15 9.0 0.015 0.021 1.764 1.789 build_core_ppnl 1 5.0 1.478 1.604 1.478 1.604 density_rs2pw 16 7.0 0.001 0.001 1.248 1.440 vdW_energy 15 10.0 1.297 1.368 1.297 1.368 x_to_yz 585 13.1 0.344 0.371 1.227 1.352 transfer_rs2pw 82 8.0 0.001 0.001 0.996 1.343 init_scf_run 1 3.0 0.000 0.001 1.315 1.316 yz_to_x 501 12.9 0.229 0.279 1.120 1.306 mp_waitany 520 11.3 0.918 1.255 0.918 1.255 xc_pw_derive 90 11.0 0.002 0.002 1.164 1.240 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=56.33, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=19.622, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.129, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.513, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.776, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.774, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=14.75, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.237, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.962, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.074, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.354, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.578, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.128 0.128 266.906 266.906 qs_energies 1 2.0 0.000 0.000 266.715 266.715 ls_scf 1 3.0 0.000 0.000 265.471 265.471 ls_scf_main 1 4.0 0.003 0.003 256.003 256.003 density_matrix_trs4 11 5.0 0.014 0.014 160.239 160.239 ls_scf_dm_to_ks 11 5.0 0.000 0.000 90.335 90.335 matrix_ls_to_qs 11 6.0 0.000 0.000 87.019 87.019 dbcsr_multiply_generic 185 6.1 0.810 0.810 77.388 77.388 arnoldi_extremal 12 6.1 0.000 0.000 70.755 70.755 arnoldi_normal_ev 12 7.1 0.031 0.031 70.755 70.755 build_subspace 23 8.1 0.074 0.074 69.447 69.447 dbcsr_matrix_vector_mult 652 9.0 0.198 0.198 69.230 69.230 dbcsr_matrix_vector_mult_local 652 10.0 67.749 67.749 67.760 67.760 multiply_cannon 185 7.1 0.436 0.436 47.043 47.043 dbcsr_copy_into_existing 11 7.0 46.992 46.992 46.992 46.992 dbcsr_complete_redistribute 23 7.5 31.819 31.819 43.894 43.894 matrix_decluster 11 7.0 0.000 0.000 40.026 40.026 multiply_cannon_loop 185 8.1 0.264 0.264 34.325 34.325 make_m2s 370 7.1 0.038 0.038 25.502 25.502 multiply_cannon_multrec 185 9.1 24.978 24.978 25.086 25.086 make_images 370 8.1 10.919 10.919 23.716 23.716 dbcsr_finalize 646 7.5 0.441 0.441 16.583 16.583 dbcsr_merge_all 597 8.5 2.788 2.788 15.112 15.112 setup_rec_index_2d 370 8.1 12.033 12.033 12.033 12.033 tree_to_linear_d 110 9.4 10.905 10.905 10.905 10.905 dbcsr_sort_indices 805 10.1 10.349 10.349 10.349 10.349 calculate_norms 370 9.1 8.974 8.974 8.974 8.974 quick_finalize 395 10.0 0.365 0.365 8.932 8.932 ls_scf_init_scf 1 4.0 0.000 0.000 8.690 8.690 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.357 8.357 dbcsr_special_finalize 370 9.1 0.002 0.002 8.259 8.259 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.686 7.686 matrix_qs_to_ls 12 5.1 0.000 0.000 5.623 5.623 matrix_cluster 12 6.1 0.000 0.000 5.623 5.623 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.030 70.150 70.162 qs_energies 1 2.0 0.000 0.000 70.026 70.026 ls_scf 1 3.0 0.000 0.000 69.977 69.978 ls_scf_main 1 4.0 0.001 0.010 67.313 67.314 density_matrix_trs4 11 5.0 0.007 0.018 64.774 64.837 dbcsr_multiply_generic 185 6.1 0.059 0.076 61.529 61.743 multiply_cannon 185 7.1 0.036 0.039 50.649 51.024 multiply_cannon_loop 185 8.1 0.119 0.127 48.060 48.693 multiply_cannon_multrec 1480 9.1 29.128 31.092 29.389 31.358 mp_waitall_1 11936 10.3 16.494 18.471 16.494 18.471 multiply_cannon_metrocomm3 1480 9.1 0.016 0.017 12.555 14.651 make_m2s 370 7.1 0.035 0.037 7.071 7.154 make_images 370 8.1 0.636 0.666 6.937 7.019 calculate_norms 2960 9.1 4.894 5.247 4.894 5.247 mp_sum_l 1199 5.3 2.931 3.554 2.931 3.554 make_images_data 370 9.1 0.011 0.012 3.163 3.447 hybrid_alltoall_any 393 9.9 0.236 1.240 2.751 2.978 arnoldi_extremal 12 6.1 0.000 0.001 2.511 2.518 arnoldi_normal_ev 12 7.1 0.002 0.008 2.511 2.517 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 2.055 2.494 build_subspace 23 8.1 0.021 0.025 2.419 2.421 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.183 2.235 dbcsr_matrix_vector_mult 652 9.0 0.013 0.050 2.048 2.130 dbcsr_complete_redistribute 23 7.5 1.204 1.264 2.001 2.081 ls_scf_init_scf 1 4.0 0.000 0.000 2.041 2.041 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.014 2.020 matrix_ls_to_qs 11 6.0 0.000 0.000 1.936 2.018 make_images_pack 370 9.1 1.697 1.882 1.701 1.887 matrix_decluster 11 7.0 0.000 0.000 1.800 1.880 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.002 1.836 1.837 dbcsr_matrix_vector_mult_local 652 10.0 1.689 1.762 1.691 1.764 multiply_cannon_metrocomm1 1480 9.1 0.006 0.007 1.065 1.605 buffer_matrices_ensure_size 370 8.1 1.378 1.446 1.378 1.446 dbcsr_finalize 646 7.5 0.008 0.008 1.304 1.431 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=74.36100000000005, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=67.749, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=46.992, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=31.819, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=24.978, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=12.033, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=8.974, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=12.113000000000007, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.689, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.204, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=29.128, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.894, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.697, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.494, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.931, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 81.026 81.026 lib_test 1 2.0 0.000 0.000 80.925 80.925 dbcsr_run_tests 3 3.0 0.283 0.283 80.925 80.925 test_multiplies_multiproc 3 4.0 0.001 0.001 63.923 63.923 dbcsr_redistribute 9 5.0 38.694 38.694 40.614 40.614 dbcsr_multiply_generic 9 5.0 0.002 0.002 21.578 21.578 dbcsr_make_random_matrix 9 4.0 13.166 13.166 16.601 16.601 multiply_cannon 9 6.0 0.003 0.003 15.239 15.239 multiply_cannon_loop 9 7.0 0.028 0.028 14.833 14.833 multiply_cannon_multrec 9 8.0 14.804 14.804 14.806 14.806 dbcsr_finalize 27 5.7 0.031 0.031 6.634 6.634 dbcsr_merge_all 18 6.5 2.540 2.540 5.725 5.725 dbcsr_data_release 975 7.6 3.339 3.339 3.339 3.339 make_m2s 18 6.0 0.001 0.001 2.356 2.356 make_images 18 7.0 0.874 0.874 2.214 2.214 tree_to_linear_d 9 7.0 2.209 2.209 2.209 2.209 dbcsr_destroy 93 5.8 0.000 0.000 1.624 1.624 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.019 23.025 23.031 lib_test 1 2.0 0.000 0.000 22.988 23.008 dbcsr_run_tests 3 3.0 0.000 0.001 22.983 23.003 test_multiplies_multiproc 3 4.0 0.000 0.003 22.065 22.134 dbcsr_multiply_generic 9 5.0 0.001 0.001 20.294 20.394 multiply_cannon 9 6.0 0.002 0.002 17.718 18.066 multiply_cannon_loop 9 7.0 0.002 0.003 17.384 17.716 multiply_cannon_multrec 72 8.0 13.034 13.420 13.035 13.422 mp_waitall_1 576 9.2 4.771 5.388 4.771 5.388 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 4.242 4.853 mp_sum_l 470 2.5 1.095 1.524 1.095 1.524 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 1.034 1.461 make_m2s 18 6.0 0.001 0.001 0.893 0.932 make_images 18 7.0 0.022 0.023 0.890 0.928 dbcsr_make_random_matrix 9 4.0 0.706 0.726 0.877 0.914 dbcsr_data_release 444 7.6 0.611 0.745 0.611 0.745 dbcsr_finalize 27 5.7 0.000 0.000 0.626 0.715 dbcsr_redistribute 9 5.0 0.283 0.333 0.667 0.704 dbcsr_destroy 111 5.9 0.000 0.001 0.543 0.678 dbcsr_merge_all 18 6.5 0.102 0.122 0.555 0.640 make_images_data 18 8.0 0.001 0.001 0.476 0.546 hybrid_alltoall_any 18 9.0 0.044 0.200 0.408 0.473 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=8.48299999999999, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=38.694, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=14.804, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.166, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=3.339, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.54, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.4229999999999983, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.283, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=13.034, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.706, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.611, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.102, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.095, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.771, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.065 0.065 144.663 144.663 qs_mol_dyn_low 1 2.0 0.005 0.005 142.884 142.884 velocity_verlet 5 3.0 0.004 0.004 115.756 115.756 qmmm_el_coupling 6 3.8 0.000 0.000 88.844 88.844 qmmm_elec_with_gaussian 6 4.8 0.027 0.027 88.839 88.839 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 87.988 87.988 qmmm_elec_gaussian_low_G 6 6.8 87.072 87.072 87.072 87.072 qs_forces 6 3.8 0.001 0.001 44.714 44.714 qs_energies 6 4.8 0.000 0.000 39.690 39.690 scf_env_do_scf 6 5.8 0.001 0.001 36.903 36.903 scf_env_do_scf_inner_loop 39 6.8 0.005 0.005 32.030 32.030 rebuild_ks_matrix 45 8.4 0.000 0.000 31.472 31.472 qs_ks_build_kohn_sham_matrix 45 9.4 0.006 0.006 31.471 31.471 qs_ks_update_qs_env 45 7.8 0.000 0.000 26.991 26.991 fft_wrap_pw1pw2 801 12.6 0.013 0.013 23.304 23.304 fft_wrap_pw1pw2_150 507 14.2 6.123 6.123 22.825 22.825 qs_vxc_create 45 10.4 0.001 0.001 18.536 18.536 xc_vxc_pw_create 45 11.4 0.799 0.799 18.536 18.536 xc_pw_derive 270 13.4 0.002 0.002 13.030 13.030 xc_pw_divergence 45 12.4 0.001 0.001 9.013 9.013 qs_rho_update_rho_low 45 7.9 0.000 0.000 9.008 9.008 calculate_rho_elec 45 8.9 0.639 0.639 9.008 9.008 xc_rho_set_and_dset_create 45 12.4 1.186 1.186 8.638 8.638 fft3d_s 802 14.6 8.556 8.556 8.565 8.565 pw_scatter_s 429 14.8 6.431 6.431 6.431 6.431 qmmm_forces 6 3.8 0.002 0.002 5.926 5.926 qmmm_forces_with_gaussian 6 4.8 0.035 0.035 5.536 5.536 pw_integral_ab 2539 7.4 5.128 5.128 5.128 5.128 init_scf_loop 6 6.8 0.000 0.000 4.866 4.866 qs_ks_ddapc 45 10.4 0.001 0.001 4.768 4.768 density_rs2pw 45 9.9 0.002 0.002 4.573 4.573 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.562 4.562 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.497 4.497 sum_up_and_integrate 45 10.4 0.001 0.001 4.372 4.372 integrate_v_rspace 45 11.4 0.007 0.007 4.302 4.302 grid_collocate_task_list 45 9.9 3.796 3.796 3.796 3.796 qmmm_forces_gaussian_low_G 6 6.8 3.777 3.777 3.777 3.777 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.041 0.070 71.606 71.624 qs_mol_dyn_low 1 2.0 0.004 0.006 70.303 70.372 qs_forces 6 3.8 0.001 0.001 49.865 49.866 qs_energies 6 4.8 0.000 0.000 47.612 47.613 scf_env_do_scf 6 5.8 0.000 0.001 46.522 46.523 scf_env_do_scf_inner_loop 113 6.2 0.003 0.024 44.511 44.513 rebuild_ks_matrix 119 8.1 0.000 0.000 34.176 34.187 qs_ks_build_kohn_sham_matrix 119 9.1 0.014 0.015 34.176 34.187 qs_ks_update_qs_env 119 7.3 0.001 0.001 32.238 32.248 velocity_verlet 5 3.0 0.002 0.004 30.517 30.520 fft_wrap_pw1pw2 2059 12.4 0.024 0.025 21.266 21.876 fft_wrap_pw1pw2_150 1321 13.9 0.732 0.758 20.553 21.189 qs_vxc_create 119 10.1 0.002 0.002 18.851 18.858 xc_vxc_pw_create 119 11.1 0.365 0.422 18.849 18.856 fft3d_ps 2059 14.4 7.615 7.824 14.011 14.818 xc_pw_derive 714 13.1 0.006 0.007 13.262 13.857 qs_rho_update_rho_low 119 7.3 0.001 0.001 11.987 11.991 calculate_rho_elec 119 8.3 0.055 0.064 11.986 11.990 qmmm_forces 6 3.8 0.004 0.004 10.355 10.355 sum_up_and_integrate 119 10.1 0.002 0.002 10.268 10.301 integrate_v_rspace 119 11.1 0.003 0.003 10.133 10.181 qmmm_forces_with_gaussian 6 4.8 0.008 0.009 9.792 9.988 xc_rho_set_and_dset_create 119 12.1 0.734 0.868 9.347 9.844 xc_pw_divergence 119 12.1 0.003 0.003 8.853 9.408 qmmm_el_coupling 6 3.8 0.000 0.000 8.804 9.015 qmmm_elec_with_gaussian 6 4.8 0.006 0.006 8.802 9.013 density_rs2pw 119 9.3 0.004 0.005 7.231 7.489 potential_pw2rs 119 12.1 0.005 0.008 5.927 5.946 mp_alltoall_z22v 2059 16.4 4.551 5.591 4.551 5.591 grid_collocate_task_list 119 9.3 4.593 4.850 4.593 4.850 pw_restrict_s3 18 5.8 2.538 2.637 4.604 4.792 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.252 4.314 grid_integrate_task_list 119 12.1 3.946 4.098 3.946 4.098 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.845 4.057 pw_prolongate_s3 18 6.8 2.109 2.202 3.845 4.057 yz_to_x 964 15.0 0.768 0.947 3.191 3.978 transfer_pw2rs 500 12.8 0.005 0.005 3.846 3.861 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.536 3.683 qmmm_forces_gaussian_low_G 6 6.8 3.504 3.567 3.504 3.567 transfer_rs2pw 488 10.2 0.006 0.007 3.275 3.526 x_to_yz 1095 15.8 1.043 1.129 3.170 3.309 pw_scatter_p 1095 14.8 3.171 3.218 3.171 3.218 qmmm_elec_gaussian_low_G 6 6.8 2.922 3.049 2.922 3.049 mp_waitany 4028 12.8 2.490 2.807 2.490 2.807 transfer_pw2rs_150 125 13.9 1.055 1.129 2.693 2.730 qs_scf_new_mos 113 7.2 0.000 0.001 2.599 2.606 qs_scf_loop_do_ot 113 8.2 0.001 0.001 2.599 2.606 transfer_rs2pw_150 125 11.2 1.031 1.109 2.299 2.573 qs_ks_ddapc 119 10.1 0.002 0.002 2.505 2.533 ot_scf_mini 113 9.2 0.001 0.002 2.511 2.517 pw_integral_ab 2761 7.7 2.107 2.225 2.371 2.494 mp_sum_dm3 33 5.7 2.242 2.297 2.242 2.297 pw_gather_p 964 14.0 2.247 2.283 2.247 2.283 xc_functional_eval 238 13.1 0.002 0.002 1.760 2.134 dbcsr_multiply_generic 2588 12.3 0.063 0.063 2.057 2.107 init_scf_loop 6 6.8 0.000 0.000 2.007 2.007 pw_derive 1089 13.4 1.941 1.983 1.941 1.983 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.970 1.971 pw_axpy 2529 9.3 1.685 1.744 1.685 1.744 pw_copy 2027 12.4 1.602 1.655 1.602 1.655 pw_poisson_solve 125 9.9 0.002 0.002 1.585 1.624 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=23.78, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=87.072, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.556, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=6.431, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=6.123, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=5.128, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.796, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.777, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=41.635999999999996, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.922, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=0.732, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=2.107, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.593, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.504, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.615, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.551, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.946, yerr=0.0 Summary: Performance test took 34 minutes. Status: OK Removing intermediate container 8a97200dcde9 ---> d667b5cda654 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 5529f14fdada Removing intermediate container 5529f14fdada ---> d63212be80fa Step 42/42 : ENTRYPOINT [] ---> Running in 0ba2e8812551 Removing intermediate container 0ba2e8812551 ---> 99c49f2cad74 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 99c49f2cad74 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2024-03-27 09:27:28+00:00