StartDate: 2023-10-25 08:04:35+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: ed1be9b29a76eae91bc5e1d9cb35fbcaa846fe6c CommitTime: 2023-10-24 22:32:05 +0200 CommitAuthor: Ole Schütt CommitSubject: Manual: Fix formatting of links #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=ed1be9b29a76eae91bc5e1d9cb35fbcaa846fe6c Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 385.4MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu aece8493d397: Pulling fs layer aece8493d397: Verifying Checksum aece8493d397: Download complete aece8493d397: Pull complete Digest: sha256:2b7412e6465c3c7fc5bb21d3e6f1917c167358449fecac8176c6e496e5c1f05f Status: Downloaded newer image for ubuntu:22.04 ---> e4c58958181a Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> eac9345624d8 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> f0475191a8e6 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> b84f74dbe543 Step 5/42 : RUN mkdir scripts ---> Using cache ---> d019d2365f5a Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 4d3d94d60e50 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 6768631271af Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> 77f450497ed5 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 8fe3abcb7484 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 551f6ee116e1 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> c2cf6987ab79 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> d7ef939e3571 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> f42b7e8ecd30 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> d9186f59f1f5 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 65653c9089b9 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> cc825d89971a Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> dd9944771677 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 85987ee1572b Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> fac508784ba2 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> e3ee738017c7 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 1a64b8c39c18 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> b078dce6440e Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 5d64d4480551 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 0b0feb805573 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> e36e21aa18c5 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 518159dc6dd9 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> c8b5e268927e Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> b4e54b721b2c Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 10298cee80fa Step 30/42 : COPY ./Makefile . ---> Using cache ---> 5015f75d14dd Step 31/42 : COPY ./src ./src ---> Using cache ---> b5269ebe8d4a Step 32/42 : COPY ./exts ./exts ---> Using cache ---> a668716177ab Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 4296e02059c2 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/" ---> Running in d6d1650da407 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Removing intermediate container d6d1650da407 ---> adb9a2a0f248 Step 35/42 : COPY ./data ./data ---> 1047fe4f6610 Step 36/42 : COPY ./tests ./tests ---> c0b8ae415ee1 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 60d2184b3448 Step 38/42 : COPY ./benchmarks ./benchmarks ---> b12ea380d2cc Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 08beeec7a7dc Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 8b14f0d6aac6 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 75 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 89.029 89.029 qs_mol_dyn_low 1 2.0 0.003 0.003 88.453 88.453 qs_forces 11 3.9 0.001 0.001 88.410 88.410 qs_energies 11 4.9 0.001 0.001 82.389 82.389 scf_env_do_scf 11 5.9 0.002 0.002 71.393 71.393 velocity_verlet 10 3.0 0.002 0.002 58.004 58.004 scf_env_do_scf_inner_loop 108 6.5 0.013 0.013 54.625 54.625 qs_scf_new_mos 108 7.5 0.001 0.001 20.857 20.857 qs_scf_loop_do_ot 108 8.5 0.001 0.001 20.856 20.856 dbcsr_multiply_generic 2286 12.5 0.150 0.150 20.666 20.666 rebuild_ks_matrix 119 8.3 0.001 0.001 20.041 20.041 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 20.041 20.041 ot_scf_mini 108 9.5 0.003 0.003 19.561 19.561 qs_rho_update_rho_low 119 7.7 0.001 0.001 19.025 19.025 calculate_rho_elec 119 8.7 0.951 0.951 19.024 19.024 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.434 18.434 init_scf_loop 11 6.9 0.000 0.000 16.620 16.620 grid_collocate_task_list 119 9.7 14.731 14.731 14.731 14.731 prepare_preconditioner 11 7.9 0.000 0.000 14.107 14.107 make_preconditioner 11 8.9 0.000 0.000 14.107 14.107 make_full_inverse_cholesky 11 9.9 0.026 0.026 12.867 12.867 sum_up_and_integrate 119 10.3 0.001 0.001 12.811 12.811 integrate_v_rspace 119 11.3 0.094 0.094 12.730 12.730 ot_mini 108 10.5 0.001 0.001 12.545 12.545 make_m2s 4572 13.5 0.046 0.046 11.073 11.073 grid_integrate_task_list 119 12.3 10.547 10.547 10.547 10.547 qs_ot_get_derivative 108 11.5 0.001 0.001 6.668 6.668 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.522 6.522 pw_transfer 1439 11.6 0.060 0.060 6.200 6.200 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 5.986 5.986 ot_diis_step 108 11.5 0.003 0.003 5.874 5.874 dbcsr_make_dense_low 5837 15.5 0.079 0.079 5.771 5.771 multiply_cannon 2286 13.5 0.212 0.212 5.718 5.718 make_dense_data 5837 16.5 5.147 5.147 5.678 5.678 make_images 4572 14.5 2.298 2.298 5.666 5.666 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.398 5.398 apply_single 119 13.6 0.000 0.000 5.398 5.398 multiply_cannon_loop 2286 14.5 0.039 0.039 5.163 5.163 multiply_cannon_multrec 2286 15.5 5.074 5.074 5.124 5.124 dbcsr_make_images_dense 3978 14.8 0.017 0.017 5.079 5.079 fft_wrap_pw1pw2_140 487 13.2 0.490 0.490 4.957 4.957 cp_fm_cholesky_decompose 22 10.9 4.542 4.542 4.542 4.542 cp_fm_cholesky_invert 11 10.9 4.140 4.140 4.140 4.140 init_scf_run 11 5.9 0.003 0.003 3.838 3.838 scf_env_initial_rho_setup 11 6.9 0.002 0.002 3.834 3.834 dbcsr_complete_redistribute 329 12.2 1.892 1.892 3.736 3.736 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.655 3.655 dbcsr_copy 2102 12.0 0.225 0.225 3.511 3.511 qs_ot_get_p 119 10.4 0.001 0.001 3.497 3.497 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.457 3.457 wfi_extrapolate 11 7.9 0.001 0.001 3.356 3.356 density_rs2pw 119 9.7 0.005 0.005 3.342 3.342 dbcsr_copy_into_existing 22 7.9 3.255 3.255 3.255 3.255 qs_create_task_list 11 7.9 0.000 0.000 3.217 3.217 generate_qs_task_list 11 8.9 1.948 1.948 3.217 3.217 fft3d_s 1202 14.6 3.095 3.095 3.100 3.100 copy_dbcsr_to_fm 153 11.3 0.002 0.002 3.020 3.020 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.562 2.562 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.459 2.459 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.342 2.342 dbcsr_data_release 279532 16.0 2.302 2.302 2.302 2.302 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.095 2.095 potential_pw2rs 119 12.3 0.046 0.046 2.089 2.089 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 2.038 2.038 pw_poisson_solve 119 10.3 0.332 0.332 2.018 2.018 copy_fm_to_dbcsr 176 11.2 0.001 0.001 2.013 2.013 qs_ot_p2m_diag 50 11.0 0.154 0.154 1.907 1.907 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.034 46.128 46.147 qs_mol_dyn_low 1 2.0 0.003 0.005 45.992 45.997 qs_forces 11 3.9 0.001 0.002 45.946 45.947 qs_energies 11 4.9 0.001 0.001 43.023 43.028 scf_env_do_scf 11 5.9 0.000 0.003 39.454 39.455 scf_env_do_scf_inner_loop 108 6.5 0.003 0.023 36.331 36.332 velocity_verlet 10 3.0 0.001 0.003 27.333 27.335 rebuild_ks_matrix 119 8.3 0.001 0.002 16.369 16.461 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.019 16.368 16.460 qs_ks_update_qs_env 119 7.6 0.001 0.001 14.558 14.639 dbcsr_multiply_generic 2286 12.5 0.075 0.084 14.328 14.504 qs_rho_update_rho_low 119 7.7 0.001 0.001 12.969 12.977 calculate_rho_elec 119 8.7 0.030 0.032 12.969 12.976 sum_up_and_integrate 119 10.3 0.002 0.002 11.973 12.001 integrate_v_rspace 119 11.3 0.004 0.004 11.952 11.982 qs_scf_new_mos 108 7.5 0.001 0.001 11.564 11.700 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.564 11.699 ot_scf_mini 108 9.5 0.002 0.003 10.928 11.069 multiply_cannon 2286 13.5 0.133 0.146 10.585 10.971 multiply_cannon_loop 2286 14.5 0.100 0.123 10.032 10.444 mp_waitall_1 158411 16.6 8.125 9.123 8.125 9.123 grid_collocate_task_list 119 9.7 8.768 9.105 8.768 9.105 grid_integrate_task_list 119 12.3 7.958 8.116 7.958 8.116 multiply_cannon_metrocomm3 18288 15.5 0.047 0.058 6.263 7.058 ot_mini 108 10.5 0.001 0.001 6.372 6.523 density_rs2pw 119 9.7 0.005 0.006 3.764 4.161 pw_transfer 1439 11.6 0.063 0.071 3.984 4.154 fft_wrap_pw1pw2 1201 12.6 0.008 0.010 3.854 4.012 multiply_cannon_multrec 18288 15.5 3.343 3.783 3.354 3.795 fft_wrap_pw1pw2_140 487 13.2 0.567 0.726 3.252 3.547 potential_pw2rs 119 12.3 0.006 0.007 3.484 3.511 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.291 3.405 apply_single 119 13.6 0.000 0.000 3.290 3.405 qs_ot_get_derivative 108 11.5 0.001 0.001 3.168 3.308 ot_diis_step 108 11.5 0.003 0.003 3.183 3.184 init_scf_loop 11 6.9 0.000 0.000 3.106 3.107 fft3d_ps 1201 14.6 1.274 1.579 2.531 2.714 make_m2s 4572 13.5 0.046 0.050 2.565 2.643 init_scf_run 11 5.9 0.000 0.006 2.510 2.510 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.510 2.510 transfer_pw2rs 487 13.2 0.005 0.008 2.446 2.451 transfer_rs2pw 487 10.6 0.006 0.007 1.899 2.365 make_images 4572 14.5 0.114 0.122 2.243 2.317 wfi_extrapolate 11 7.9 0.001 0.001 2.285 2.285 mp_waitany 9880 13.7 1.497 2.130 1.497 2.130 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.943 1.954 qs_ot_get_p 119 10.4 0.001 0.001 1.518 1.766 transfer_rs2pw_140 130 11.5 0.193 0.249 1.018 1.482 make_images_data 4572 15.5 0.037 0.044 1.265 1.448 mp_alltoall_z22v 1201 16.6 0.990 1.370 0.990 1.370 transfer_pw2rs_140 130 13.9 0.369 0.485 1.287 1.331 mp_sum_l 11298 13.2 0.735 1.275 0.735 1.275 prepare_preconditioner 11 7.9 0.000 0.000 1.221 1.240 make_preconditioner 11 8.9 0.000 0.000 1.221 1.240 hybrid_alltoall_any 4725 16.4 0.067 0.233 1.071 1.231 mp_sum_d 4135 12.0 0.668 1.185 0.668 1.185 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.106 1.126 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.987 1.053 transfer_pw2rs_50 119 14.3 0.335 0.404 0.898 1.010 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.919 0.991 mp_alltoall_d11v 2130 13.8 0.789 0.987 0.789 0.987 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.973 0.975 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.873 0.952 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=48.988, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.731, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.547, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.147, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.074, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.542, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=16.437, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=8.768, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=7.958, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.343, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.497, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.125, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 111.070 111.070 qs_mol_dyn_low 1 2.0 0.003 0.003 110.415 110.415 qs_forces 11 3.9 0.001 0.001 110.373 110.373 qs_energies 11 4.9 0.001 0.001 103.021 103.021 scf_env_do_scf 11 5.9 0.002 0.002 90.306 90.306 scf_env_do_scf_inner_loop 96 6.5 0.011 0.011 72.425 72.425 velocity_verlet 10 3.0 0.002 0.002 70.926 70.926 rebuild_ks_matrix 107 8.3 0.001 0.001 32.954 32.954 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 32.953 32.953 qs_rho_update_rho_low 107 7.7 0.001 0.001 31.412 31.412 calculate_rho_elec 107 8.7 0.857 0.857 31.411 31.411 qs_ks_update_qs_env 107 7.6 0.001 0.001 29.780 29.780 grid_collocate_task_list 107 9.7 27.314 27.314 27.314 27.314 sum_up_and_integrate 107 10.3 0.001 0.001 26.139 26.139 integrate_v_rspace 107 11.3 0.084 0.084 26.077 26.077 grid_integrate_task_list 107 12.3 24.429 24.429 24.429 24.429 qs_scf_new_mos 96 7.5 0.000 0.000 17.962 17.962 qs_scf_loop_do_ot 96 8.5 0.001 0.001 17.961 17.961 dbcsr_multiply_generic 1966 12.4 0.140 0.140 17.961 17.961 init_scf_loop 11 6.9 0.000 0.000 17.760 17.760 ot_scf_mini 96 9.5 0.002 0.002 16.787 16.787 prepare_preconditioner 11 7.9 0.000 0.000 13.861 13.861 make_preconditioner 11 8.9 0.000 0.000 13.860 13.860 make_full_inverse_cholesky 11 9.9 0.018 0.018 12.675 12.675 ot_mini 96 10.5 0.001 0.001 10.735 10.735 make_m2s 3932 13.4 0.040 0.040 9.539 9.539 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.939 6.939 qs_ot_get_derivative 96 11.5 0.001 0.001 5.744 5.744 pw_transfer 1295 11.6 0.055 0.055 5.726 5.726 fft_wrap_pw1pw2 1081 12.6 0.005 0.005 5.440 5.440 init_scf_run 11 5.9 0.003 0.003 5.192 5.192 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.189 5.189 multiply_cannon 1966 13.4 0.163 0.163 5.163 5.163 dbcsr_make_dense_low 4961 15.5 0.069 0.069 4.993 4.993 ot_diis_step 96 11.5 0.003 0.003 4.989 4.989 make_dense_data 4961 16.5 4.470 4.470 4.911 4.911 make_images 3932 14.4 2.017 2.017 4.865 4.865 multiply_cannon_loop 1966 14.4 0.030 0.030 4.751 4.751 multiply_cannon_multrec 1966 15.4 4.675 4.675 4.721 4.721 fft_wrap_pw1pw2_140 439 13.2 0.477 0.477 4.720 4.720 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.593 4.593 apply_single 107 13.6 0.000 0.000 4.593 4.593 wfi_extrapolate 11 7.9 0.001 0.001 4.592 4.592 cp_fm_cholesky_decompose 22 10.9 4.480 4.480 4.480 4.480 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.364 4.364 cp_fm_cholesky_invert 11 10.9 4.127 4.127 4.127 4.127 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.034 4.034 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.015 4.015 qs_create_task_list 11 7.9 0.000 0.000 3.655 3.655 generate_qs_task_list 11 8.9 2.380 2.380 3.655 3.655 dbcsr_complete_redistribute 317 12.2 1.879 1.879 3.619 3.619 dbcsr_copy 1855 11.9 0.208 0.208 3.489 3.489 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.316 3.316 dbcsr_copy_into_existing 22 7.9 3.246 3.246 3.246 3.246 density_rs2pw 107 9.7 0.004 0.004 3.240 3.240 qs_ot_get_p 107 10.4 0.001 0.001 2.999 2.999 copy_dbcsr_to_fm 147 11.2 0.002 0.002 2.901 2.901 fft3d_s 1082 14.6 2.772 2.772 2.777 2.777 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.492 2.492 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.277 2.277 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.025 73.363 73.376 qs_mol_dyn_low 1 2.0 0.003 0.003 73.236 73.241 qs_forces 11 3.9 0.001 0.002 73.192 73.193 qs_energies 11 4.9 0.001 0.001 68.420 68.429 scf_env_do_scf 11 5.9 0.000 0.002 63.404 63.404 scf_env_do_scf_inner_loop 96 6.5 0.003 0.019 58.763 58.763 velocity_verlet 10 3.0 0.001 0.003 43.764 43.766 rebuild_ks_matrix 107 8.3 0.000 0.001 30.974 31.029 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.018 30.974 31.028 qs_ks_update_qs_env 107 7.6 0.001 0.001 27.304 27.354 sum_up_and_integrate 107 10.3 0.002 0.002 26.929 26.954 integrate_v_rspace 107 11.3 0.004 0.004 26.908 26.936 qs_rho_update_rho_low 107 7.7 0.001 0.001 26.293 26.296 calculate_rho_elec 107 8.7 0.027 0.028 26.292 26.296 grid_integrate_task_list 107 12.3 22.684 23.068 22.684 23.068 grid_collocate_task_list 107 9.7 21.779 22.603 21.779 22.603 dbcsr_multiply_generic 1966 12.4 0.069 0.077 13.511 13.702 qs_scf_new_mos 96 7.5 0.001 0.001 10.882 10.946 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.881 10.945 ot_scf_mini 96 9.5 0.002 0.002 10.321 10.389 multiply_cannon 1966 13.4 0.121 0.132 9.759 10.175 multiply_cannon_loop 1966 14.4 0.092 0.108 9.227 9.516 mp_waitall_1 136719 16.5 7.394 8.116 7.394 8.116 ot_mini 96 10.5 0.001 0.001 6.286 6.357 multiply_cannon_metrocomm3 15728 15.4 0.042 0.048 5.697 6.268 density_rs2pw 107 9.7 0.004 0.006 4.115 4.884 init_scf_loop 11 6.9 0.000 0.000 4.625 4.626 init_scf_run 11 5.9 0.000 0.006 3.913 3.913 scf_env_initial_rho_setup 11 6.9 0.000 0.004 3.913 3.913 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.794 3.803 pw_transfer 1295 11.6 0.058 0.065 3.595 3.691 wfi_extrapolate 11 7.9 0.001 0.001 3.567 3.568 fft_wrap_pw1pw2 1081 12.6 0.008 0.009 3.475 3.564 multiply_cannon_multrec 15728 15.4 3.137 3.459 3.147 3.469 qs_ot_get_derivative 96 11.5 0.001 0.001 3.290 3.355 transfer_rs2pw 439 10.6 0.005 0.006 2.473 3.262 potential_pw2rs 107 12.3 0.005 0.008 3.235 3.254 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.097 3.212 apply_single 107 13.6 0.000 0.000 3.096 3.211 fft_wrap_pw1pw2_140 439 13.2 0.533 0.641 2.951 3.148 mp_waitany 8968 13.7 2.211 3.114 2.211 3.114 ot_diis_step 96 11.5 0.003 0.003 2.981 2.981 transfer_rs2pw_140 118 11.5 0.147 0.165 1.707 2.496 make_m2s 3932 13.4 0.041 0.051 2.416 2.468 fft3d_ps 1081 14.6 1.157 1.285 2.240 2.398 transfer_pw2rs 439 13.2 0.005 0.007 2.310 2.314 make_images 3932 14.4 0.102 0.108 2.128 2.180 mp_alltoall_d11v 1998 13.7 1.250 1.932 1.250 1.932 rs_gather_matrices 107 12.3 0.064 0.071 0.948 1.591 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=45.702, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.314, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.429, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.675, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.48, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.47, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.158, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.779, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.684, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.137, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.211, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.394, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.224 0.224 134.492 134.492 qs_energies 1 2.0 0.000 0.000 133.665 133.665 scf_env_do_scf 1 3.0 0.000 0.000 132.310 132.310 qs_ks_update_qs_env 8 5.0 0.000 0.000 126.908 126.908 rebuild_ks_matrix 7 6.0 0.000 0.000 126.852 126.852 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 126.852 126.852 hfx_ks_matrix 7 8.0 0.000 0.000 117.748 117.748 integrate_four_center 7 9.0 1.959 1.959 117.722 117.722 integrate_four_center_main 7 10.0 0.389 0.389 103.618 103.618 integrate_four_center_bin 451 11.0 103.229 103.229 103.229 103.229 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 75.126 75.126 init_scf_loop 1 4.0 0.000 0.000 57.172 57.172 integrate_four_center_load 7 10.0 0.007 0.007 11.839 11.839 hfx_load_balance 1 11.0 0.001 0.001 11.832 11.832 hfx_load_balance_bin 1 12.0 5.923 5.923 5.923 5.923 hfx_load_balance_count 1 12.0 5.891 5.891 5.891 5.891 qs_vxc_create 14 8.0 0.000 0.000 3.215 3.215 xc_vxc_pw_create 14 9.0 0.117 0.117 3.214 3.214 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.207 0.229 128.729 128.741 qs_energies 1 2.0 0.000 0.000 128.353 128.364 scf_env_do_scf 1 3.0 0.000 0.000 128.014 128.015 qs_ks_update_qs_env 8 5.0 0.000 0.000 125.848 125.849 rebuild_ks_matrix 7 6.0 0.000 0.000 125.841 125.841 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 125.841 125.841 hfx_ks_matrix 7 8.0 0.000 0.000 120.206 120.207 integrate_four_center 7 9.0 0.052 0.338 120.197 120.198 integrate_four_center_main 7 10.0 0.003 0.004 101.311 107.732 integrate_four_center_bin 448 11.0 101.307 107.729 101.307 107.729 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 75.992 75.992 init_scf_loop 1 4.0 0.000 0.000 52.020 52.021 mp_sync 56 11.2 6.436 13.035 6.436 13.035 integrate_four_center_load 7 10.0 0.000 0.000 11.783 11.784 hfx_load_balance 1 11.0 0.001 0.001 11.783 11.784 hfx_load_balance_bin 1 12.0 5.797 5.923 5.797 5.923 hfx_load_balance_count 1 12.0 5.789 5.852 5.789 5.852 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=16.87699999999998, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=103.229, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.923, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.891, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.959, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.389, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.224, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.138000000000005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=101.307, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.797, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.789, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.052, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.207, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=6.436, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 77.799 77.799 qs_energies 1 2.0 0.000 0.000 77.424 77.424 mp2_main 1 3.0 0.000 0.000 74.464 74.464 mp2_gpw_main 1 4.0 0.000 0.000 74.361 74.361 rpa_ri_compute_en 1 5.0 0.000 0.000 70.636 70.636 rpa_num_int 1 6.0 0.002 0.002 70.630 70.630 compute_mat_P_omega 1 7.0 0.004 0.004 60.871 60.871 compute_mat_P_omega_contract 10 8.0 8.913 8.913 60.509 60.509 dbt_total 2336 9.6 0.014 0.014 46.696 46.696 dbt_contract 787 11.0 0.042 0.042 39.633 39.633 dbt_tas_total 1149 12.2 0.240 0.240 38.272 38.272 dbt_tas_multiply 807 12.1 0.002 0.002 36.968 36.968 dbt_tas_dbm 807 14.1 0.003 0.003 30.707 30.707 dbm_multiply 807 16.1 30.698 30.698 30.698 30.698 dbt_tas_mm_1N 524 15.1 0.001 0.001 23.544 23.544 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 22.411 22.411 compute_mat_P_omega_calc_M_occ 250 9.0 8.935 8.935 17.068 17.068 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 6.503 6.503 dbt_copy 1103 10.7 0.120 0.120 5.740 5.740 dbt_tas_mm_2 251 15.0 0.001 0.001 5.638 5.638 compute_QP_energies 1 7.0 0.000 0.000 4.898 4.898 compute_self_energy_cubic_gw 1 8.0 0.053 0.053 4.896 4.896 contract_cubic_gw 21 9.0 0.000 0.000 3.916 3.916 dbt_tas_reserve_blocks_index 3261 14.3 0.476 0.476 3.759 3.759 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.718 3.718 dbm_reserve_blocks 3628 15.3 3.347 3.347 3.347 3.347 dbt_reserve_blocks_index 2280 13.1 0.053 0.053 2.877 2.877 scf_env_do_scf 1 3.0 0.000 0.000 2.848 2.848 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.848 2.848 dbt_reserve_blocks_index_array 2222 12.2 0.008 0.008 2.824 2.824 dbt_crop 1042 12.0 1.478 1.478 2.544 2.544 dbt_tas_copy 574 11.4 1.291 1.291 2.222 2.222 convert_to_new_pgrid 2421 14.1 0.074 0.074 2.129 2.129 dbm_copy 1614 15.1 2.056 2.056 2.056 2.056 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.005 2.005 dbt_tas_reshape 367 15.0 0.042 0.042 2.004 2.004 compute_W_cubic_GW 10 7.0 0.004 0.004 1.977 1.977 dbt_reshape 278 11.9 1.073 1.073 1.956 1.956 get_2c_integrals 1 6.0 0.000 0.000 1.699 1.699 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.033 33.772 33.791 qs_energies 1 2.0 0.000 0.000 33.621 33.623 mp2_main 1 3.0 0.000 0.001 32.637 32.639 mp2_gpw_main 1 4.0 0.000 0.000 32.535 32.537 rpa_ri_compute_en 1 5.0 0.000 0.000 31.271 31.272 rpa_num_int 1 6.0 0.000 0.003 31.270 31.271 dbt_total 2336 9.6 0.015 0.015 27.581 27.622 compute_mat_P_omega 1 7.0 0.001 0.007 26.783 26.796 compute_mat_P_omega_contract 10 8.0 0.403 0.428 26.301 26.309 dbt_contract 787 11.0 0.039 0.052 20.269 20.281 dbt_tas_total 1149 12.2 0.079 0.088 17.905 17.906 dbt_tas_multiply 807 12.1 0.002 0.002 17.805 17.809 dbt_tas_dbm 807 14.1 0.003 0.003 12.952 12.961 dbm_multiply 807 16.1 10.223 11.150 10.223 11.150 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.912 7.913 compute_mat_P_omega_calc_M_occ 250 9.0 0.390 0.409 7.661 7.662 dbt_copy 1149 10.8 0.013 0.015 6.164 6.515 mp_sync 8688 11.6 5.186 6.244 5.186 6.244 dbt_tas_mm_2 251 15.0 0.001 0.001 6.208 6.216 dbt_reshape 1136 11.8 2.635 2.784 5.856 6.204 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.319 5.320 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.513 5.024 compute_QP_energies 1 7.0 0.000 0.000 2.847 2.849 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 2.845 2.846 mp_waitall_2 3812 15.3 2.401 2.643 2.401 2.643 dbt_communicate_buffer 1136 12.8 0.052 0.057 2.378 2.565 contract_cubic_gw 21 9.0 0.000 0.000 2.210 2.210 dbt_reserve_blocks_index 2887 13.1 0.067 0.078 1.718 1.977 dbt_reserve_blocks_index_array 2829 12.2 0.008 0.009 1.709 1.968 dbt_tas_reserve_blocks_index 3347 14.5 0.433 0.467 1.699 1.957 dbt_crop 1042 12.0 0.896 1.004 1.432 1.649 dbm_reserve_blocks 3752 15.4 1.355 1.588 1.355 1.588 mp2_ri_gpw_compute_in 1 5.0 0.003 0.004 1.262 1.262 dbt_tas_replicate 405 14.1 0.541 0.713 1.134 1.210 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.102 1.116 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.049 1.053 parallel_gemm_fm 105 8.4 0.000 0.000 1.011 1.018 parallel_gemm_fm_cosma 105 9.4 1.011 1.018 1.011 1.018 scf_env_do_scf 1 3.0 0.000 0.000 0.936 0.936 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 0.935 0.935 convert_to_new_pgrid 2421 14.1 0.026 0.030 0.779 0.890 dbm_copy 1608 15.1 0.748 0.857 0.748 0.857 mp_max_i 2005 9.8 0.681 0.832 0.681 0.832 mp_sum_l 6165 12.9 0.681 0.799 0.681 0.799 compute_W_cubic_GW 10 7.0 0.001 0.001 0.744 0.751 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.777, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=30.698, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.935, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.913, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.347, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.056, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.073, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=10.430999999999997, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.223, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.39, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.403, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.355, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.748, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.635, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.401, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.186, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.022 0.022 234.962 234.962 qs_forces 1 2.0 0.000 0.000 234.435 234.435 rebuild_ks_matrix 7 6.6 0.000 0.000 232.964 232.964 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 232.964 232.964 hfx_ks_matrix 7 8.6 0.000 0.000 231.148 231.148 hfx_ri_update_ks 7 9.6 0.000 0.000 191.438 191.438 hfx_ri_update_ks_Pmat 7 10.6 32.135 32.135 191.435 191.435 dbt_total 823 11.0 0.006 0.006 175.875 175.875 qs_energies 1 3.0 0.000 0.000 170.628 170.628 scf_env_do_scf 1 4.0 0.000 0.000 170.277 170.277 qs_ks_update_qs_env 8 6.0 0.000 0.000 169.208 169.208 dbt_contract 207 12.4 0.064 0.064 156.989 156.989 dbt_tas_total 343 13.7 1.506 1.506 154.859 154.859 dbt_tas_multiply 216 13.5 0.001 0.001 151.692 151.692 dbt_tas_dbm 216 15.5 0.001 0.001 140.040 140.040 dbm_multiply 216 17.5 140.037 140.037 140.037 140.037 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 134.433 134.433 dbt_tas_mm_2 91 16.5 0.001 0.001 126.122 126.122 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 105.187 105.187 init_scf_loop 2 5.0 0.000 0.000 65.088 65.088 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 63.759 63.759 hfx_ri_update_forces 1 7.0 1.657 1.657 39.707 39.707 hfx_ri_forces_Pmat_3c 1 8.0 4.743 4.743 22.261 22.261 dbt_copy 423 11.8 0.046 0.046 15.711 15.711 precalc_derivatives 1 8.0 2.214 2.214 13.467 13.467 dbt_reshape 132 13.2 6.711 6.711 10.839 10.839 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.790 9.790 dbt_tas_mm_3T 77 17.1 0.000 0.000 9.554 9.554 dbt_tas_reserve_blocks_index 1297 15.4 0.972 0.972 8.217 8.217 build_3c_derivatives 3 9.0 2.669 2.669 7.587 7.587 dbm_reserve_blocks 1439 16.3 7.437 7.437 7.437 7.437 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 7.146 7.146 dbt_reserve_blocks_index 846 14.4 0.090 0.090 6.346 6.346 dbt_reserve_blocks_index_array 816 13.5 0.007 0.007 6.242 6.242 dbt_crop 372 13.7 2.724 2.724 4.933 4.933 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.030 46.910 46.923 qs_forces 1 2.0 0.000 0.000 46.756 46.756 rebuild_ks_matrix 7 6.6 0.000 0.000 45.986 45.986 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 45.986 45.986 hfx_ks_matrix 7 8.6 0.000 0.000 44.962 44.969 dbt_total 823 11.0 0.006 0.006 39.926 39.934 dbt_contract 207 12.4 0.025 0.026 30.880 30.893 dbt_tas_total 343 13.7 0.075 0.146 27.067 27.067 dbt_tas_multiply 216 13.5 0.001 0.001 26.854 26.855 hfx_ri_update_ks 7 9.6 0.000 0.000 26.220 26.220 hfx_ri_update_ks_Pmat 7 10.6 1.232 1.301 26.216 26.217 qs_energies 1 3.0 0.000 0.000 24.772 24.772 scf_env_do_scf 1 4.0 0.000 0.001 24.624 24.624 qs_ks_update_qs_env 8 6.0 0.000 0.000 24.013 24.013 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 21.974 21.975 dbt_tas_dbm 216 15.5 0.001 0.001 20.786 20.791 dbm_multiply 216 17.5 18.719 19.909 18.719 19.909 hfx_ri_update_forces 1 7.0 0.056 0.060 18.741 18.748 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 13.998 13.998 hfx_ri_forces_Pmat_3c 1 8.0 0.155 0.168 13.964 13.964 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 11.691 11.692 init_scf_loop 2 5.0 0.000 0.000 10.625 10.625 dbt_tas_mm_2 91 16.5 0.001 0.001 9.416 9.421 dbt_copy 539 12.5 0.011 0.013 8.211 8.621 mp_sync 2797 12.9 4.795 6.655 4.795 6.655 dbt_reshape 393 13.9 3.416 3.546 6.306 6.595 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.450 6.192 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 4.850 4.851 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.207 4.317 precalc_derivatives 1 8.0 0.084 0.090 3.599 3.599 dbt_tas_reserve_blocks_index 1450 15.9 0.889 0.927 2.993 3.399 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.187 3.187 mp_waitall_2 1234 16.4 2.698 2.920 2.698 2.920 dbt_reserve_blocks_index 1107 14.8 0.104 0.119 2.466 2.783 dbt_reserve_blocks_index_array 1077 13.9 0.005 0.007 2.440 2.755 dbm_reserve_blocks 1599 16.7 2.298 2.726 2.298 2.726 dbt_crop 372 13.7 1.706 1.776 2.457 2.631 dbt_communicate_buffer 393 14.9 0.013 0.014 1.897 2.080 build_3c_derivatives 3 9.0 0.221 0.237 2.014 2.020 dbt_tas_replicate 149 15.4 0.640 0.679 1.626 1.728 convert_to_new_pgrid 648 15.5 0.041 0.085 1.328 1.720 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.623 1.627 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.617 1.618 dbm_copy 452 16.3 1.160 1.554 1.160 1.554 dbt_tas_copy 146 12.6 0.626 0.677 1.204 1.340 dbt_tas_communicate_buffer 328 16.8 0.011 0.012 0.913 1.000 mp_sum_l 6385 13.7 0.722 0.978 0.722 0.978 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=43.89899999999997, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=140.037, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=32.135, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=7.437, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=6.711, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.743, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=13.596999999999994, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=18.719, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.232, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.298, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.416, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.155, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.698, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=4.795, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.017 186.710 186.710 qs_energies 1 2.0 0.000 0.000 186.529 186.529 mp2_main 1 3.0 0.000 0.000 181.899 181.899 mp2_gpw_main 1 4.0 0.001 0.001 181.503 181.503 mp2_ri_gpw_compute_in 1 5.0 0.385 0.385 135.309 135.309 mp2_ri_gpw_compute_in_loop 1 6.0 0.010 0.010 126.318 126.318 mp2_eri_3c_integrate_gpw 2656 7.0 0.014 0.014 97.933 97.933 integrate_v_rspace 2666 8.0 0.679 0.679 85.043 85.043 grid_integrate_task_list 2666 9.0 82.361 82.361 82.361 82.361 mp2_ri_gpw_compute_en 1 5.0 0.085 0.085 46.169 46.169 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.317 9.317 44.299 44.299 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.012 2.012 27.737 27.737 local_gemm 2080 8.0 25.725 25.725 25.725 25.725 dbcsr_multiply_generic 5322 8.0 0.206 0.206 21.646 21.646 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 21.621 21.621 pw_transfer 63872 10.6 0.971 0.971 11.940 11.940 calculate_wavefunction 2656 8.0 7.876 7.876 11.496 11.496 multiply_cannon 5322 9.0 0.477 0.477 10.812 10.812 fft_wrap_pw1pw2 53228 11.4 0.109 0.109 10.755 10.755 multiply_cannon_loop 5322 10.0 0.151 0.151 9.390 9.390 get_2c_integrals 1 6.0 0.000 0.000 8.605 8.605 make_m2s 10644 9.0 0.064 0.064 8.438 8.438 make_images 10644 10.0 3.239 3.239 8.076 8.076 compute_2c_integrals 1 7.0 0.006 0.006 7.747 7.747 compute_2c_integrals_loop_lm 1 8.0 0.012 0.012 7.734 7.734 mp2_eri_2c_integrate_gpw 1 9.0 0.872 0.872 7.722 7.722 fft_wrap_pw1pw2_20 21271 12.4 0.509 0.509 7.718 7.718 multiply_cannon_multrec 5322 11.0 7.628 7.628 7.669 7.669 fft3d_s 53229 13.4 6.675 6.675 6.713 6.713 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.361 2.361 6.645 6.645 mp2_ri_gpw_compute_en_ener 2080 7.0 5.370 5.370 5.370 5.370 copy_dbcsr_to_fm 2679 8.0 0.025 0.025 4.791 4.791 scf_env_do_scf 1 3.0 0.000 0.000 4.218 4.218 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.218 4.218 potential_pw2rs 5322 10.0 0.148 0.148 3.972 3.972 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.029 38.255 38.268 qs_energies 1 2.0 0.000 0.000 38.123 38.123 mp2_main 1 3.0 0.000 0.001 36.237 36.238 mp2_gpw_main 1 4.0 0.001 0.001 36.147 36.147 mp2_ri_gpw_compute_en 1 5.0 0.252 0.259 19.165 19.411 mp2_ri_gpw_compute_en_RI_loop 1 6.0 3.348 3.810 18.051 18.057 mp2_ri_gpw_compute_in 1 5.0 0.043 0.045 16.901 17.195 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 15.659 15.955 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 13.565 13.879 integrate_v_rspace 93 8.1 0.103 0.116 13.442 13.758 grid_integrate_task_list 93 9.1 13.146 13.443 13.146 13.443 mp2_ri_gpw_compute_en_expansio 65 7.0 0.137 0.159 10.732 10.977 local_gemm 65 8.0 10.595 10.825 10.595 10.825 mp2_ri_gpw_compute_en_comm 30 7.0 0.119 0.169 3.584 4.598 mp_sendrecv_dm3 1860 8.0 2.808 4.087 2.808 4.087 dbcsr_multiply_generic 176 8.0 0.008 0.013 1.788 2.133 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.002 1.772 2.116 scf_env_do_scf 1 3.0 0.000 0.000 1.751 1.752 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 1.751 1.752 get_2c_integrals 1 6.0 0.003 0.009 1.176 1.197 multiply_cannon 176 9.0 0.016 0.017 1.050 1.146 multiply_cannon_loop 176 10.0 0.002 0.002 0.995 1.091 make_m2s 352 9.0 0.003 0.003 0.702 0.948 make_images 352 10.0 0.051 0.051 0.690 0.935 multiply_cannon_multrec 246 11.0 0.852 0.905 0.857 0.911 fill_local_i_aL 1920 8.0 0.701 0.891 0.701 0.891 compute_2c_integrals 1 7.0 0.003 0.004 0.843 0.857 qs_scf_new_mos 10 5.0 0.000 0.000 0.811 0.814 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.737 0.775 mp2_eri_2c_integrate_gpw 1 9.0 0.194 0.202 0.735 0.774 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=53.80299999999997, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=82.361, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=25.725, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.317, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=7.876, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.628, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.506000000000004, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.146, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=10.595, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=3.348, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.852, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.808, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.090 0.090 124.463 124.463 qs_energies 1 2.0 0.000 0.000 123.143 123.143 scf_env_do_scf 1 3.0 0.000 0.000 114.748 114.748 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 114.748 114.748 qs_ks_update_qs_env 15 5.0 0.000 0.000 48.767 48.767 rebuild_ks_matrix 15 6.0 0.000 0.000 48.559 48.559 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 48.559 48.559 qs_scf_new_mos 15 5.0 0.000 0.000 42.502 42.502 eigensolver 15 6.0 0.002 0.002 34.986 34.986 qs_vxc_create 15 8.0 0.037 0.037 33.510 33.510 calculate_dispersion_nonloc 15 9.0 6.921 6.921 29.215 29.215 pw_transfer 1191 10.0 0.062 0.062 23.055 23.055 fft_wrap_pw1pw2 1086 11.0 0.008 0.008 22.857 22.857 cp_fm_diag_elpa 15 7.0 0.000 0.000 21.953 21.953 cp_fm_diag_elpa_base 15 8.0 19.498 19.498 21.953 21.953 qs_rho_update_rho_low 16 5.0 0.000 0.000 19.779 19.779 calculate_rho_elec 16 6.0 0.216 0.216 19.779 19.779 grid_collocate_task_list 16 7.0 18.414 18.414 18.414 18.414 fft_wrap_pw1pw2_150 765 12.0 3.577 3.577 16.653 16.653 sum_up_and_integrate 15 8.0 0.000 0.000 13.846 13.846 integrate_v_rspace 15 9.0 0.018 0.018 13.829 13.829 grid_integrate_task_list 15 10.0 13.309 13.309 13.309 13.309 cp_fm_cholesky_restore 45 7.0 10.774 10.774 10.774 10.774 fft3d_s 1087 13.0 10.458 10.458 10.468 10.468 pw_scatter_s 585 13.1 7.093 7.093 7.093 7.093 fft_wrap_pw1pw2_200 197 12.3 0.755 0.755 6.019 6.019 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.539 5.539 dbcsr_complete_redistribute 46 8.3 2.249 2.249 5.367 5.367 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.791 4.791 gspace_mixing 14 5.0 0.171 0.171 4.758 4.758 cp_fm_upper_to_full 30 8.0 4.712 4.712 4.712 4.712 vdW_energy 15 10.0 4.263 4.263 4.263 4.263 xc_vxc_pw_create 15 9.0 0.212 0.212 4.258 4.258 broyden_mixing 14 6.0 4.148 4.148 4.148 4.148 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 3.604 3.604 init_scf_run 1 3.0 0.001 0.001 3.111 3.111 xc_pw_derive 90 11.0 0.001 0.001 2.745 2.745 build_core_ppnl 1 5.0 2.541 2.541 2.541 2.541 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.027 60.687 60.700 qs_energies 1 2.0 0.000 0.000 60.390 60.399 scf_env_do_scf 1 3.0 0.000 0.001 56.245 56.246 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 56.245 56.246 qs_ks_update_qs_env 15 5.0 0.000 0.000 24.421 24.428 rebuild_ks_matrix 15 6.0 0.000 0.000 24.391 24.397 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 24.391 24.397 qs_rho_update_rho_low 16 5.0 0.000 0.000 18.918 18.924 calculate_rho_elec 16 6.0 0.007 0.007 18.917 18.924 grid_collocate_task_list 16 7.0 17.233 17.488 17.233 17.488 sum_up_and_integrate 15 8.0 0.000 0.001 13.635 13.685 integrate_v_rspace 15 9.0 0.001 0.001 13.627 13.680 qs_scf_new_mos 15 5.0 0.000 0.001 13.379 13.407 grid_integrate_task_list 15 10.0 12.818 13.047 12.818 13.047 eigensolver 15 6.0 0.001 0.002 12.385 12.420 qs_vxc_create 15 8.0 0.001 0.001 10.431 10.445 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.716 8.720 cp_fm_diag_elpa_base 15 8.0 8.580 8.603 8.711 8.713 calculate_dispersion_nonloc 15 9.0 0.901 0.919 8.447 8.474 pw_transfer 1191 10.0 0.081 0.101 8.344 8.412 fft_wrap_pw1pw2 1086 11.0 0.012 0.014 8.188 8.284 fft3d_ps 1086 13.0 2.726 3.100 5.659 6.190 fft_wrap_pw1pw2_150 765 12.0 0.596 0.747 5.451 5.499 cp_fm_cholesky_restore 45 7.0 3.518 3.592 3.518 3.592 mp_alltoall_z22v 1086 15.0 2.403 3.384 2.403 3.384 fft_wrap_pw1pw2_200 197 12.3 0.400 0.495 2.626 2.693 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.653 2.653 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.349 2.557 xc_vxc_pw_create 15 9.0 0.015 0.020 1.983 2.016 yz_to_x 501 13.9 0.182 0.229 1.500 2.014 x_to_yz 585 14.1 0.321 0.352 1.407 1.818 density_rs2pw 16 7.0 0.001 0.001 1.545 1.770 transfer_rs2pw 82 8.0 0.001 0.001 1.238 1.607 build_core_ppnl 1 5.0 1.456 1.585 1.456 1.585 mp_waitany 520 11.3 1.119 1.508 1.119 1.508 xc_pw_derive 90 11.0 0.001 0.002 1.347 1.417 vdW_energy 15 10.0 1.262 1.320 1.262 1.320 init_scf_run 1 3.0 0.000 0.001 1.264 1.265 transfer_rs2pw_200 18 8.8 0.024 0.028 0.836 1.228 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=52.00999999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=19.498, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.414, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.309, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.774, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.458, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.811999999999998, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.58, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.233, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=12.818, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.518, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.726, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.078 0.078 272.309 272.309 qs_energies 1 2.0 0.000 0.000 272.170 272.170 ls_scf 1 3.0 0.000 0.000 270.922 270.922 ls_scf_main 1 4.0 0.002 0.002 261.745 261.745 density_matrix_trs4 11 5.0 0.013 0.013 170.155 170.155 ls_scf_dm_to_ks 11 5.0 0.000 0.000 86.288 86.288 arnoldi_extremal 12 6.1 0.000 0.000 85.817 85.817 arnoldi_normal_ev 12 7.1 0.031 0.031 85.817 85.817 dbcsr_matrix_vector_mult 652 9.0 0.195 0.195 84.251 84.251 build_subspace 23 8.1 0.076 0.076 84.177 84.177 matrix_ls_to_qs 11 6.0 0.000 0.000 83.046 83.046 dbcsr_matrix_vector_mult_local 652 10.0 82.828 82.828 82.838 82.838 dbcsr_multiply_generic 185 6.1 0.856 0.856 73.055 73.055 multiply_cannon 185 7.1 0.400 0.400 44.429 44.429 dbcsr_copy_into_existing 11 7.0 44.240 44.240 44.240 44.240 dbcsr_complete_redistribute 23 7.5 31.330 31.330 42.594 42.594 matrix_decluster 11 7.0 0.000 0.000 38.805 38.805 multiply_cannon_loop 185 8.1 0.210 0.210 32.007 32.007 make_m2s 370 7.1 0.038 0.038 24.468 24.468 make_images 370 8.1 10.751 10.751 22.873 22.873 multiply_cannon_multrec 185 9.1 22.449 22.449 22.572 22.572 dbcsr_finalize 646 7.5 0.218 0.218 14.487 14.487 dbcsr_merge_all 597 8.5 1.977 1.977 13.353 13.353 setup_rec_index_2d 370 8.1 11.830 11.830 11.830 11.830 tree_to_linear_d 110 9.4 10.115 10.115 10.115 10.115 dbcsr_sort_indices 1103 9.9 10.026 10.026 10.026 10.026 calculate_norms 370 9.1 9.224 9.224 9.224 9.224 quick_finalize 395 10.0 0.365 0.365 8.686 8.686 ls_scf_init_scf 1 4.0 0.000 0.000 8.503 8.503 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.165 8.165 dbcsr_special_finalize 370 9.1 0.002 0.002 8.014 8.014 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.515 7.515 matrix_qs_to_ls 12 5.1 0.000 0.000 5.528 5.528 matrix_cluster 12 6.1 0.000 0.000 5.528 5.528 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.032 68.499 68.518 qs_energies 1 2.0 0.000 0.000 68.378 68.379 ls_scf 1 3.0 0.000 0.000 68.331 68.332 ls_scf_main 1 4.0 0.001 0.010 65.535 65.535 density_matrix_trs4 11 5.0 0.006 0.021 63.071 63.137 dbcsr_multiply_generic 185 6.1 0.057 0.068 60.107 60.389 multiply_cannon 185 7.1 0.033 0.036 49.403 50.533 multiply_cannon_loop 185 8.1 0.106 0.120 47.014 48.058 multiply_cannon_multrec 1480 9.1 28.389 31.253 28.628 31.486 mp_waitall_1 11936 10.3 16.601 20.767 16.601 20.767 multiply_cannon_metrocomm3 1480 9.1 0.014 0.017 12.490 17.365 make_m2s 370 7.1 0.034 0.036 6.921 7.006 make_images 370 8.1 0.631 0.680 6.794 6.878 calculate_norms 2960 9.1 4.523 5.079 4.523 5.079 mp_sum_l 1199 5.3 2.943 4.677 2.943 4.677 multiply_cannon_metrocomm1 1480 9.1 0.006 0.007 1.233 3.615 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 2.064 3.537 make_images_data 370 9.1 0.010 0.014 3.132 3.366 hybrid_alltoall_any 393 9.9 0.206 1.057 2.685 2.933 arnoldi_extremal 12 6.1 0.000 0.000 2.458 2.479 arnoldi_normal_ev 12 7.1 0.001 0.008 2.457 2.478 build_subspace 23 8.1 0.019 0.025 2.355 2.357 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.122 2.235 ls_scf_init_scf 1 4.0 0.000 0.000 2.195 2.196 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.168 2.176 dbcsr_complete_redistribute 23 7.5 1.169 1.230 1.937 2.027 dbcsr_matrix_vector_mult 652 9.0 0.013 0.048 1.954 2.012 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.990 1.993 matrix_ls_to_qs 11 6.0 0.000 0.000 1.862 1.953 make_images_pack 370 9.1 1.653 1.831 1.656 1.834 matrix_decluster 11 7.0 0.000 0.000 1.740 1.833 dbcsr_matrix_vector_mult_local 652 10.0 1.581 1.636 1.583 1.638 buffer_matrices_ensure_size 370 8.1 1.286 1.524 1.286 1.524 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=70.40799999999999, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=82.828, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=44.24, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=31.33, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.449, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=11.83, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.224, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=11.64, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.581, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.169, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=28.389, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.523, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.601, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.943, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.653, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.009 70.407 70.407 lib_test 1 2.0 0.000 0.000 70.397 70.397 dbcsr_run_tests 3 3.0 0.002 0.002 70.397 70.397 test_multiplies_multiproc 3 4.0 0.001 0.001 54.302 54.302 dbcsr_redistribute 9 5.0 35.250 35.250 36.845 36.845 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.094 16.094 dbcsr_make_random_matrix 9 4.0 13.039 13.039 15.987 15.987 multiply_cannon 9 6.0 0.011 0.011 11.340 11.340 multiply_cannon_loop 9 7.0 0.014 0.014 10.967 10.967 multiply_cannon_multrec 9 8.0 10.952 10.952 10.953 10.953 dbcsr_finalize 27 5.7 0.014 0.014 5.420 5.420 dbcsr_merge_all 18 6.5 1.949 1.949 4.719 4.719 dbcsr_data_release 975 7.6 2.712 2.712 2.712 2.712 tree_to_linear_d 9 7.0 1.884 1.884 1.884 1.884 make_m2s 18 6.0 0.001 0.001 1.743 1.743 make_images 18 7.0 0.626 0.626 1.691 1.691 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.021 18.592 18.599 lib_test 1 2.0 0.000 0.000 18.553 18.574 dbcsr_run_tests 3 3.0 0.000 0.001 18.547 18.568 test_multiplies_multiproc 3 4.0 0.000 0.002 17.701 17.759 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.068 16.168 multiply_cannon 9 6.0 0.002 0.002 14.090 14.425 multiply_cannon_loop 9 7.0 0.002 0.002 13.823 14.160 multiply_cannon_multrec 72 8.0 10.471 11.157 10.471 11.157 mp_waitall_1 576 9.2 3.670 4.704 3.670 4.704 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 3.209 4.272 mp_sum_l 470 2.5 0.810 1.370 0.810 1.370 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.789 1.350 dbcsr_make_random_matrix 9 4.0 0.675 1.015 0.808 1.071 dbcsr_data_release 444 7.6 0.613 0.747 0.613 0.747 make_m2s 18 6.0 0.001 0.001 0.673 0.722 make_images 18 7.0 0.021 0.022 0.670 0.719 dbcsr_destroy 111 5.9 0.000 0.000 0.541 0.651 dbcsr_finalize 27 5.7 0.000 0.000 0.512 0.618 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.138 0.612 dbcsr_redistribute 9 5.0 0.227 0.273 0.523 0.559 dbcsr_merge_all 18 6.5 0.078 0.094 0.439 0.519 make_images_data 18 8.0 0.001 0.001 0.356 0.421 hybrid_alltoall_any 18 9.0 0.030 0.130 0.302 0.388 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.5049999999999955, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=35.25, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.039, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.952, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.712, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=1.949, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.048000000000002, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.227, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.675, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.471, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.613, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.078, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.67, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.81, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.057 0.057 130.576 130.576 qs_mol_dyn_low 1 2.0 0.004 0.004 129.179 129.179 velocity_verlet 5 3.0 0.003 0.003 105.183 105.183 qmmm_el_coupling 6 3.8 0.000 0.000 87.228 87.228 qmmm_elec_with_gaussian 6 4.8 0.012 0.012 87.224 87.224 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 86.609 86.609 qmmm_elec_gaussian_low_G 6 6.8 85.704 85.704 85.704 85.704 qs_forces 6 3.8 0.001 0.001 33.743 33.743 qs_energies 6 4.8 0.000 0.000 29.916 29.916 scf_env_do_scf 6 5.8 0.001 0.001 27.627 27.627 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 24.041 24.041 rebuild_ks_matrix 45 8.4 0.000 0.000 23.063 23.063 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 23.063 23.063 qs_ks_update_qs_env 45 7.8 0.000 0.000 19.659 19.659 pw_transfer 966 12.3 0.052 0.052 16.955 16.955 fft_wrap_pw1pw2 801 13.6 0.006 0.006 16.755 16.755 fft_wrap_pw1pw2_150 507 15.2 2.298 2.298 16.348 16.348 qs_vxc_create 45 10.4 0.001 0.001 12.965 12.965 xc_vxc_pw_create 45 11.4 0.647 0.647 12.965 12.965 xc_pw_derive 270 13.4 0.002 0.002 9.128 9.128 fft3d_s 802 15.6 7.810 7.810 7.818 7.818 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.035 7.035 calculate_rho_elec 45 8.9 0.564 0.564 7.035 7.035 xc_rho_set_and_dset_create 45 12.4 0.555 0.555 6.708 6.708 pw_scatter_s 429 15.8 5.604 5.604 5.604 5.604 xc_pw_divergence 45 12.4 0.001 0.001 5.564 5.564 qmmm_forces 6 3.8 0.001 0.001 5.139 5.139 qmmm_forces_with_gaussian 6 4.8 0.017 0.017 4.816 4.816 pw_integral_ab 2539 7.4 4.328 4.328 4.328 4.328 qs_ks_ddapc 45 10.4 0.001 0.001 4.257 4.257 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.098 4.098 init_scf_loop 6 6.8 0.000 0.000 3.577 3.577 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.412 3.412 qmmm_forces_gaussian_low_G 6 6.8 3.401 3.401 3.401 3.401 density_rs2pw 45 9.9 0.002 0.002 3.245 3.245 grid_collocate_task_list 45 9.9 3.227 3.227 3.227 3.227 sum_up_and_integrate 45 10.4 0.000 0.000 2.953 2.953 integrate_v_rspace 45 11.4 0.007 0.007 2.922 2.922 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.048 56.370 56.384 qs_mol_dyn_low 1 2.0 0.003 0.005 54.999 55.062 qs_forces 6 3.8 0.001 0.001 40.836 40.837 qs_energies 6 4.8 0.000 0.000 38.986 38.986 scf_env_do_scf 6 5.8 0.000 0.001 38.006 38.006 scf_env_do_scf_inner_loop 113 6.2 0.002 0.017 36.446 36.447 rebuild_ks_matrix 119 8.1 0.000 0.000 26.882 26.891 qs_ks_build_kohn_sham_matrix 119 9.1 0.014 0.016 26.881 26.890 qs_ks_update_qs_env 119 7.3 0.001 0.001 25.318 25.326 velocity_verlet 5 3.0 0.002 0.004 22.773 22.776 pw_transfer 2446 12.3 0.145 0.182 19.085 19.571 fft_wrap_pw1pw2 2059 13.4 0.021 0.028 18.729 19.256 fft_wrap_pw1pw2_150 1321 14.9 2.587 3.141 18.037 18.583 qs_vxc_create 119 10.1 0.002 0.002 14.429 14.433 xc_vxc_pw_create 119 11.1 0.139 0.206 14.427 14.432 fft3d_ps 2059 15.4 6.877 7.936 12.592 14.042 xc_pw_derive 714 13.1 0.010 0.012 11.076 11.332 qs_rho_update_rho_low 119 7.3 0.001 0.001 10.738 10.739 calculate_rho_elec 119 8.3 0.050 0.059 10.738 10.739 sum_up_and_integrate 119 10.1 0.002 0.002 9.094 9.117 integrate_v_rspace 119 11.1 0.003 0.003 9.050 9.073 xc_pw_divergence 119 12.1 0.005 0.006 7.157 7.392 qmmm_forces 6 3.8 0.002 0.002 7.194 7.194 xc_rho_set_and_dset_create 119 12.1 0.309 0.379 6.878 7.088 qmmm_forces_with_gaussian 6 4.8 0.006 0.007 6.747 7.041 mp_alltoall_z22v 2059 17.4 4.505 6.699 4.505 6.699 density_rs2pw 119 9.3 0.006 0.008 6.282 6.456 qmmm_el_coupling 6 3.8 0.000 0.000 6.033 6.225 qmmm_elec_with_gaussian 6 4.8 0.003 0.004 6.032 6.223 potential_pw2rs 119 12.1 0.005 0.006 5.484 5.500 grid_collocate_task_list 119 9.3 4.238 4.538 4.238 4.538 x_to_yz 1095 16.8 0.691 0.779 3.019 3.996 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.840 3.981 yz_to_x 964 16.0 0.465 0.615 2.641 3.798 transfer_pw2rs 500 12.8 0.005 0.007 3.654 3.682 mp_waitany 4028 12.8 2.641 3.538 2.641 3.538 grid_integrate_task_list 119 12.1 3.298 3.479 3.298 3.479 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.302 3.426 qmmm_forces_gaussian_low_G 6 6.8 3.159 3.298 3.159 3.298 transfer_rs2pw 488 10.2 0.006 0.009 2.791 3.116 qmmm_elec_gaussian_low_G 6 6.8 2.711 2.835 2.711 2.835 pw_restrict_s3 18 5.8 1.295 1.372 2.355 2.612 transfer_pw2rs_150 125 13.9 0.734 0.941 2.352 2.425 transfer_rs2pw_150 125 11.2 0.588 0.807 1.960 2.314 qs_scf_new_mos 113 7.2 0.000 0.000 2.243 2.249 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.243 2.249 pw_gather_p 964 15.0 1.830 2.239 1.830 2.239 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 1.980 2.170 pw_prolongate_s3 18 6.8 1.101 1.164 1.980 2.170 ot_scf_mini 113 9.2 0.001 0.001 2.162 2.168 qs_ks_ddapc 119 10.1 0.002 0.003 2.044 2.158 dbcsr_multiply_generic 2588 12.3 0.058 0.059 1.864 1.891 mp_sum_d 5822 12.2 0.948 1.711 0.948 1.711 pw_scatter_p 1095 15.8 1.663 1.697 1.663 1.697 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.577 1.578 init_scf_loop 6 6.8 0.000 0.000 1.556 1.556 pw_integral_ab 2761 7.7 1.040 1.214 1.394 1.533 mp_waitall_1 177795 16.4 1.090 1.282 1.090 1.282 mp_sum_dm3 33 5.7 1.221 1.269 1.221 1.269 qmmm_env_create 1 2.0 0.025 0.029 1.250 1.252 ot_mini 113 10.2 0.001 0.001 1.189 1.194 transfer_pw2rs_40 119 14.1 0.265 0.326 1.044 1.172 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=20.501999999999995, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=85.704, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.81, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.604, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.328, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.401, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.227, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=30.541999999999998, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.711, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.04, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.159, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.238, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.298, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.505, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.877, yerr=0.0 Summary: Performance test took 33 minutes. Status: OK Removing intermediate container 8b14f0d6aac6 ---> eb76de3f30fd Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in b42b725cfb98 Removing intermediate container b42b725cfb98 ---> e18960ab6066 Step 42/42 : ENTRYPOINT [] ---> Running in bedb21c754f0 Removing intermediate container bedb21c754f0 ---> 056e4142d232 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 056e4142d232 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2023-10-25 08:50:08+00:00