StartDate: 2023-01-26 19:11:16+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: 452b3252e6f2376ff1818a3a99c9ccc117ee600e CommitTime: 2023-01-26 16:07:07 +0100 CommitAuthor: mattiatj CommitSubject: Refactor allocations of imaginary parts for RTP (#2531) Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=452b3252e6f2376ff1818a3a99c9ccc117ee600e Sending build context to Docker daemon 366.6MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 6e3729cf69e0: Already exists Digest: sha256:27cb6e6ccef575a4698b66f5de06c7ecd61589132d5a91d098f7f3f9285415a9 Status: Downloaded newer image for ubuntu:22.04 ---> 6b7dfa7e8fdb Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> e98199835f6f Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> ccd07bf8588a Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 0aa7ad9d2809 Step 5/42 : RUN mkdir scripts ---> Using cache ---> e9459f85bb88 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 47c0caf3dab5 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 19088ac45e1f Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> 92805a05cd8b Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> d51b4a6b3b7e Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> a18c614ddaa6 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 4fea06abe960 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> fa60034e8c40 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> fd68e6f2b3d3 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 0ba9822292ef Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> ef29664ef50b Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 8d51dd88f640 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> d71174547fe5 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 42542bcbda3b Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 0a417aca997e Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> efd49f9102c1 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 9064d449ccf0 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> c9af7766876b Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> a850ca552dea Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 4ad597af4959 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 93920de8ee90 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> add3a812b5ee Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> fe8761c7db95 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 9b821ac6a470 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> 777ce1207ae1 Step 30/42 : COPY ./Makefile . ---> Using cache ---> ea891a5b8d5f Step 31/42 : COPY ./src ./src ---> Using cache ---> 7c73a2cd42db Step 32/42 : COPY ./exts ./exts ---> Using cache ---> 84efc3623907 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 42cfb605f0ae Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in bc319f7d0bce './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container bc319f7d0bce ---> be4d1f8527d4 Step 35/42 : COPY ./data ./data ---> bf5391321e7e Step 36/42 : COPY ./tests ./tests ---> 8fc747a6b9c2 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 2ab5e7693264 Step 38/42 : COPY ./benchmarks ./benchmarks ---> 624661ae2ceb Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 9e82cb9c7246 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 8c86fd98b9f3 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 69 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.032 0.032 89.331 89.331 qs_mol_dyn_low 1 2.0 0.003 0.003 88.671 88.671 qs_forces 11 3.9 0.001 0.001 88.631 88.631 qs_energies 11 4.9 0.001 0.001 82.526 82.526 scf_env_do_scf 11 5.9 0.001 0.001 71.851 71.851 velocity_verlet 10 3.0 0.002 0.002 58.024 58.024 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 54.783 54.783 rebuild_ks_matrix 119 8.3 0.001 0.001 20.405 20.405 qs_ks_build_kohn_sham_matrix 119 9.3 0.013 0.013 20.404 20.404 dbcsr_multiply_generic 2286 12.5 0.164 0.164 20.042 20.042 qs_scf_new_mos 108 7.5 0.001 0.001 19.958 19.958 qs_scf_loop_do_ot 108 8.5 0.001 0.001 19.958 19.958 qs_rho_update_rho_low 119 7.7 0.001 0.001 19.888 19.888 calculate_rho_elec 119 8.7 0.955 0.955 19.887 19.887 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.766 18.766 ot_scf_mini 108 9.5 0.002 0.002 18.522 18.522 init_scf_loop 11 6.9 0.000 0.000 16.918 16.918 grid_collocate_task_list 119 9.7 15.388 15.388 15.388 15.388 prepare_preconditioner 11 7.9 0.000 0.000 14.472 14.472 make_preconditioner 11 8.9 0.000 0.000 14.472 14.472 make_full_inverse_cholesky 11 9.9 0.000 0.000 13.370 13.370 sum_up_and_integrate 119 10.3 0.824 0.824 13.332 13.332 integrate_v_rspace 119 11.3 0.093 0.093 12.508 12.508 ot_mini 108 10.5 0.001 0.001 12.036 12.036 make_m2s 4572 13.5 0.047 0.047 10.982 10.982 grid_integrate_task_list 119 12.3 10.571 10.571 10.571 10.571 qs_ot_get_derivative 108 11.5 0.001 0.001 6.366 6.366 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.305 6.305 pw_transfer 1439 11.6 0.068 0.068 6.043 6.043 dbcsr_make_dense_low 5837 15.5 0.073 0.073 6.032 6.032 make_dense_data 5837 16.5 5.401 5.401 5.945 5.945 fft_wrap_pw1pw2 1201 12.6 0.007 0.007 5.831 5.831 ot_diis_step 108 11.5 0.004 0.004 5.667 5.667 make_images 4572 14.5 2.174 2.174 5.459 5.459 dbcsr_make_images_dense 3978 14.8 0.019 0.019 5.223 5.223 multiply_cannon 2286 13.5 0.193 0.193 5.132 5.132 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.128 5.128 apply_single 119 13.6 0.000 0.000 5.128 5.128 fft_wrap_pw1pw2_140 487 13.2 0.438 0.438 5.037 5.037 cp_fm_cholesky_decompose 22 10.9 4.933 4.933 4.933 4.933 multiply_cannon_loop 2286 14.5 0.040 0.040 4.639 4.639 multiply_cannon_multrec 2286 15.5 4.550 4.550 4.598 4.598 cp_fm_cholesky_invert 11 10.9 4.290 4.290 4.290 4.290 init_scf_run 11 5.9 0.002 0.002 3.776 3.776 scf_env_initial_rho_setup 11 6.9 0.001 0.001 3.774 3.774 dbcsr_copy 2102 12.0 0.225 0.225 3.737 3.737 dbcsr_complete_redistribute 329 12.2 1.920 1.920 3.604 3.604 density_rs2pw 119 9.7 0.005 0.005 3.545 3.545 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.488 3.488 dbcsr_copy_into_existing 22 7.9 3.477 3.477 3.478 3.478 wfi_extrapolate 11 7.9 0.001 0.001 3.316 3.316 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.289 3.289 copy_dbcsr_to_fm 153 11.3 0.003 0.003 3.005 3.005 qs_ot_get_p 119 10.4 0.001 0.001 2.935 2.935 qs_create_task_list 11 7.9 0.000 0.000 2.911 2.911 generate_qs_task_list 11 8.9 1.967 1.967 2.911 2.911 fft3d_s 1202 14.6 2.839 2.839 2.844 2.844 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.615 2.615 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.586 2.586 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.364 2.364 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.040 2.040 pw_poisson_solve 119 10.3 0.356 0.356 2.014 2.014 dbcsr_data_release 279534 16.0 1.984 1.984 1.984 1.984 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 1.948 1.948 potential_pw2rs 119 12.3 0.048 0.048 1.843 1.843 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.021 47.707 47.717 qs_mol_dyn_low 1 2.0 0.003 0.004 47.579 47.583 qs_forces 11 3.9 0.001 0.001 47.538 47.538 qs_energies 11 4.9 0.001 0.001 44.275 44.277 scf_env_do_scf 11 5.9 0.000 0.002 40.594 40.595 scf_env_do_scf_inner_loop 108 6.5 0.003 0.022 37.496 37.496 velocity_verlet 10 3.0 0.002 0.003 28.869 28.870 rebuild_ks_matrix 119 8.3 0.001 0.001 17.240 17.332 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.017 17.240 17.332 qs_ks_update_qs_env 119 7.6 0.001 0.001 15.249 15.333 qs_rho_update_rho_low 119 7.7 0.001 0.001 14.790 14.809 calculate_rho_elec 119 8.7 0.030 0.031 14.789 14.808 dbcsr_multiply_generic 2286 12.5 0.073 0.079 13.087 13.361 sum_up_and_integrate 119 10.3 0.034 0.039 12.936 12.981 integrate_v_rspace 119 11.3 0.004 0.005 12.902 12.951 qs_scf_new_mos 108 7.5 0.001 0.001 10.357 10.582 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.357 10.581 multiply_cannon 2286 13.5 0.154 0.170 9.579 9.919 ot_scf_mini 108 9.5 0.002 0.002 9.675 9.884 grid_collocate_task_list 119 9.7 9.241 9.544 9.241 9.544 multiply_cannon_loop 2286 14.5 0.088 0.100 9.022 9.420 grid_integrate_task_list 119 12.3 8.163 8.464 8.163 8.464 mp_waitall_1 158411 16.6 7.251 8.030 7.251 8.030 ot_mini 108 10.5 0.001 0.001 5.647 5.869 rs_pw_transfer 974 11.9 0.011 0.014 5.278 5.775 multiply_cannon_metrocomm3 18288 15.5 0.035 0.039 5.030 5.698 density_rs2pw 119 9.7 0.005 0.006 4.940 5.414 pw_transfer 1439 11.6 0.103 0.120 4.279 4.362 fft_wrap_pw1pw2 1201 12.6 0.009 0.011 4.104 4.180 fft_wrap_pw1pw2_140 487 13.2 0.271 0.323 3.496 3.691 potential_pw2rs 119 12.3 0.006 0.007 3.451 3.472 multiply_cannon_multrec 18288 15.5 3.171 3.421 3.181 3.433 fft3d_ps 1201 14.6 1.274 1.461 3.200 3.303 qs_ot_get_derivative 108 11.5 0.001 0.001 2.889 3.087 init_scf_loop 11 6.9 0.000 0.000 3.084 3.085 mp_waitany 9880 13.7 2.383 3.061 2.383 3.061 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.648 2.742 apply_single 119 13.6 0.000 0.000 2.648 2.742 ot_diis_step 108 11.5 0.003 0.004 2.725 2.725 init_scf_run 11 5.9 0.000 0.004 2.596 2.596 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.595 2.596 rs_pw_transfer_RS2PW_140 130 11.5 0.179 0.208 2.004 2.496 make_m2s 4572 13.5 0.045 0.050 2.330 2.406 wfi_extrapolate 11 7.9 0.001 0.001 2.355 2.355 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.138 2.149 make_images 4572 14.5 0.116 0.118 2.002 2.076 mp_alltoall_d11v 2130 13.8 1.459 1.854 1.459 1.854 rs_gather_matrices 119 12.3 0.104 0.117 1.227 1.571 mp_alltoall_z22v 1201 16.6 1.150 1.504 1.150 1.504 qs_ot_get_p 119 10.4 0.001 0.002 1.243 1.467 mp_sum_l 11218 13.2 0.735 1.331 0.735 1.331 make_images_data 4572 15.5 0.034 0.041 1.120 1.293 rs_pw_transfer_PW2RS_140 130 13.9 0.345 0.434 1.221 1.280 multiply_cannon_metrocomm1 18288 15.5 0.018 0.021 0.535 1.247 prepare_preconditioner 11 7.9 0.000 0.000 1.204 1.232 make_preconditioner 11 8.9 0.000 0.000 1.204 1.232 hybrid_alltoall_any 4725 16.4 0.059 0.216 0.970 1.120 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.095 1.113 mp_sum_d 4129 12.0 0.740 1.104 0.740 1.104 yz_to_x 487 15.3 0.323 0.401 0.958 1.090 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.921 1.026 x_to_yz 476 15.9 0.412 0.494 0.922 1.017 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.918 1.010 rs_pw_transfer_PW2RS_50 119 14.3 0.291 0.353 0.855 0.976 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.887 0.967 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=48.48800000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.388, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.571, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.401, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.933, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.55, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.498, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.241, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.163, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.171, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.383, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.251, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.028 0.028 112.325 112.325 qs_mol_dyn_low 1 2.0 0.003 0.003 111.648 111.648 qs_forces 11 3.9 0.001 0.001 111.609 111.609 qs_energies 11 4.9 0.001 0.001 103.951 103.951 scf_env_do_scf 11 5.9 0.001 0.001 91.208 91.208 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 72.910 72.910 velocity_verlet 10 3.0 0.002 0.002 71.760 71.760 rebuild_ks_matrix 107 8.3 0.001 0.001 33.620 33.620 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 33.619 33.619 qs_rho_update_rho_low 107 7.7 0.001 0.001 31.869 31.869 calculate_rho_elec 107 8.7 0.857 0.857 31.869 31.869 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.205 30.205 grid_collocate_task_list 107 9.7 27.660 27.660 27.660 27.660 sum_up_and_integrate 107 10.3 0.716 0.716 27.110 27.110 integrate_v_rspace 107 11.3 0.089 0.089 26.393 26.393 grid_integrate_task_list 107 12.3 24.556 24.556 24.556 24.556 init_scf_loop 11 6.9 0.000 0.000 18.154 18.154 dbcsr_multiply_generic 1966 12.4 0.135 0.135 17.925 17.925 qs_scf_new_mos 96 7.5 0.001 0.001 17.607 17.607 qs_scf_loop_do_ot 96 8.5 0.001 0.001 17.607 17.607 ot_scf_mini 96 9.5 0.002 0.002 16.423 16.423 prepare_preconditioner 11 7.9 0.000 0.000 14.076 14.076 make_preconditioner 11 8.9 0.000 0.000 14.076 14.076 make_full_inverse_cholesky 11 9.9 0.000 0.000 12.936 12.936 ot_mini 96 10.5 0.001 0.001 10.715 10.715 make_m2s 3932 13.4 0.040 0.040 9.825 9.825 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.665 6.665 pw_transfer 1295 11.6 0.059 0.059 5.752 5.752 qs_ot_get_derivative 96 11.5 0.001 0.001 5.639 5.639 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.570 5.570 init_scf_run 11 5.9 0.002 0.002 5.423 5.423 scf_env_initial_rho_setup 11 6.9 0.000 0.000 5.422 5.422 dbcsr_make_dense_low 4961 15.5 0.108 0.108 5.353 5.353 make_dense_data 4961 16.5 4.608 4.608 5.233 5.233 ot_diis_step 96 11.5 0.003 0.003 5.073 5.073 fft_wrap_pw1pw2_140 439 13.2 0.436 0.436 4.856 4.856 wfi_extrapolate 11 7.9 0.001 0.001 4.795 4.795 make_images 3932 14.4 1.890 1.890 4.770 4.770 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.696 4.696 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.651 4.651 apply_single 107 13.6 0.000 0.000 4.650 4.650 multiply_cannon 1966 13.4 0.179 0.179 4.642 4.642 cp_fm_cholesky_decompose 22 10.9 4.544 4.544 4.544 4.544 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.275 4.275 cp_fm_cholesky_invert 11 10.9 4.216 4.216 4.216 4.216 multiply_cannon_loop 1966 14.4 0.055 0.055 4.211 4.211 multiply_cannon_multrec 1966 15.4 4.112 4.112 4.155 4.155 dbcsr_complete_redistribute 317 12.2 1.896 1.896 3.789 3.789 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.741 3.741 dbcsr_copy 1855 11.9 0.206 0.206 3.506 3.506 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.381 3.381 density_rs2pw 107 9.7 0.004 0.004 3.352 3.352 qs_create_task_list 11 7.9 0.000 0.000 3.349 3.349 generate_qs_task_list 11 8.9 2.400 2.400 3.349 3.349 dbcsr_copy_into_existing 22 7.9 3.271 3.271 3.271 3.271 copy_dbcsr_to_fm 147 11.2 0.002 0.002 3.005 3.005 fft3d_s 1082 14.6 2.795 2.795 2.815 2.815 qs_ot_get_p 107 10.4 0.001 0.001 2.607 2.607 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.509 2.509 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.322 2.322 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.022 72.391 72.402 qs_mol_dyn_low 1 2.0 0.003 0.003 72.265 72.270 qs_forces 11 3.9 0.001 0.001 72.225 72.225 qs_energies 11 4.9 0.001 0.001 67.431 67.433 scf_env_do_scf 11 5.9 0.000 0.002 62.470 62.471 scf_env_do_scf_inner_loop 96 6.5 0.003 0.017 57.845 57.845 velocity_verlet 10 3.0 0.002 0.003 42.851 42.852 rebuild_ks_matrix 107 8.3 0.000 0.001 31.133 31.185 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.015 31.133 31.184 qs_ks_update_qs_env 107 7.6 0.001 0.001 27.440 27.486 sum_up_and_integrate 107 10.3 0.033 0.038 27.159 27.198 integrate_v_rspace 107 11.3 0.004 0.005 27.125 27.168 qs_rho_update_rho_low 107 7.7 0.001 0.001 26.712 26.721 calculate_rho_elec 107 8.7 0.027 0.030 26.712 26.720 grid_integrate_task_list 107 12.3 22.945 23.354 22.945 23.354 grid_collocate_task_list 107 9.7 22.148 22.544 22.148 22.544 dbcsr_multiply_generic 1966 12.4 0.065 0.068 12.192 12.342 qs_scf_new_mos 96 7.5 0.001 0.001 9.504 9.564 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.504 9.563 multiply_cannon 1966 13.4 0.136 0.148 8.952 9.198 ot_scf_mini 96 9.5 0.002 0.002 8.927 8.989 multiply_cannon_loop 1966 14.4 0.083 0.094 8.428 8.705 mp_waitall_1 136719 16.5 6.887 7.553 6.887 7.553 multiply_cannon_metrocomm3 15728 15.4 0.032 0.036 4.842 5.507 ot_mini 96 10.5 0.001 0.001 5.245 5.316 rs_pw_transfer 878 11.9 0.010 0.011 4.396 4.955 init_scf_loop 11 6.9 0.000 0.000 4.612 4.613 density_rs2pw 107 9.7 0.005 0.005 4.010 4.562 pw_transfer 1295 11.6 0.095 0.114 3.894 3.960 init_scf_run 11 5.9 0.000 0.004 3.903 3.903 scf_env_initial_rho_setup 11 6.9 0.000 0.003 3.903 3.903 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.818 3.827 fft_wrap_pw1pw2 1081 12.6 0.009 0.010 3.734 3.795 wfi_extrapolate 11 7.9 0.001 0.001 3.553 3.554 fft_wrap_pw1pw2_140 439 13.2 0.248 0.293 3.226 3.368 potential_pw2rs 107 12.3 0.006 0.007 3.244 3.268 multiply_cannon_multrec 15728 15.4 2.836 3.065 2.845 3.075 fft3d_ps 1081 14.6 1.158 1.314 2.906 3.005 qs_ot_get_derivative 96 11.5 0.001 0.001 2.732 2.791 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.477 2.549 apply_single 107 13.6 0.000 0.000 2.477 2.549 ot_diis_step 96 11.5 0.003 0.003 2.494 2.494 mp_waitany 8968 13.7 1.655 2.265 1.655 2.265 make_m2s 3932 13.4 0.040 0.042 2.156 2.221 make_images 3932 14.4 0.104 0.106 1.864 1.925 rs_pw_transfer_RS2PW_140 118 11.5 0.181 0.211 1.289 1.856 mp_alltoall_d11v 1998 13.7 1.094 1.779 1.094 1.779 rs_gather_matrices 107 12.3 0.104 0.116 0.878 1.514 mp_alltoall_z22v 1081 16.6 1.040 1.449 1.040 1.449 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=42.62900000000002, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.66, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.556, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.608, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.544, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=4.216, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.112, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=15.920000000000002, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=22.148, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.945, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.836, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=6.887, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.655, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.189 0.189 106.725 106.725 qs_energies 1 2.0 0.000 0.000 105.856 105.856 scf_env_do_scf 1 3.0 0.000 0.000 104.661 104.661 qs_ks_update_qs_env 8 5.0 0.000 0.000 99.556 99.556 rebuild_ks_matrix 7 6.0 0.000 0.000 99.501 99.501 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 99.501 99.501 hfx_ks_matrix 7 8.0 0.000 0.000 90.442 90.442 integrate_four_center 7 9.0 1.677 1.677 90.413 90.413 integrate_four_center_main 7 10.0 0.764 0.764 81.551 81.551 integrate_four_center_bin 449 11.0 80.787 80.787 80.787 80.787 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 56.846 56.846 init_scf_loop 1 4.0 0.000 0.000 47.805 47.805 integrate_four_center_load 7 10.0 0.000 0.000 6.937 6.937 hfx_load_balance 1 11.0 0.002 0.002 6.937 6.937 hfx_load_balance_bin 1 12.0 3.461 3.461 3.461 3.461 hfx_load_balance_count 1 12.0 3.457 3.457 3.457 3.457 qs_vxc_create 14 8.0 0.000 0.000 3.237 3.237 xc_vxc_pw_create 14 9.0 0.184 0.184 3.237 3.237 calculate_rho_elec 15 7.4 0.118 0.118 2.480 2.480 prepare_preconditioner 1 5.0 0.000 0.000 2.416 2.416 make_preconditioner 1 6.0 0.000 0.000 2.416 2.416 xc_rho_set_and_dset_create 14 10.0 0.109 0.109 2.406 2.406 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 2.252 2.252 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.220 0.242 96.139 96.151 qs_energies 1 2.0 0.000 0.000 95.748 95.756 scf_env_do_scf 1 3.0 0.000 0.000 95.413 95.414 qs_ks_update_qs_env 8 5.0 0.000 0.000 93.423 93.423 rebuild_ks_matrix 7 6.0 0.000 0.000 93.415 93.415 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 93.415 93.415 hfx_ks_matrix 7 8.0 0.000 0.000 87.969 87.971 integrate_four_center 7 9.0 0.051 0.331 87.960 87.961 integrate_four_center_main 7 10.0 0.003 0.003 79.790 80.897 integrate_four_center_bin 448 11.0 79.787 80.894 79.787 80.894 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 53.191 53.191 init_scf_loop 1 4.0 0.000 0.000 42.221 42.233 integrate_four_center_load 7 10.0 0.000 0.000 5.683 5.684 hfx_load_balance 1 11.0 0.001 0.001 5.683 5.684 mp_sync 70 11.3 1.820 3.342 1.820 3.342 hfx_load_balance_count 1 12.0 2.770 2.841 2.770 2.841 hfx_load_balance_bin 1 12.0 2.775 2.838 2.775 2.838 qs_vxc_create 14 8.0 0.000 0.000 2.491 2.491 xc_vxc_pw_create 14 9.0 0.007 0.008 2.491 2.491 xc_rho_set_and_dset_create 14 10.0 0.010 0.012 1.962 2.078 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=16.39, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=80.787, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.461, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.457, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.677, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.764, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.189, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=8.712999999999994, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=79.787, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.775, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.77, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.051, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.22, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=1.82, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 79.798 79.798 qs_energies 1 2.0 0.000 0.000 79.390 79.390 mp2_main 1 3.0 0.000 0.000 76.615 76.615 mp2_gpw_main 1 4.0 0.000 0.000 76.512 76.512 rpa_ri_compute_en 1 5.0 0.000 0.000 72.816 72.816 rpa_num_int 1 6.0 0.001 0.001 72.810 72.810 compute_mat_P_omega 1 7.0 0.003 0.003 62.509 62.509 compute_mat_P_omega_contract 10 8.0 8.843 8.843 62.312 62.312 dbt_total 2336 9.6 0.011 0.011 48.955 48.955 dbt_contract 787 11.0 0.034 0.034 42.186 42.186 dbt_tas_total 1149 12.2 0.189 0.189 41.255 41.255 dbt_tas_multiply 807 12.1 0.002 0.002 39.905 39.905 dbt_tas_dbm 807 14.1 0.003 0.003 33.368 33.368 dbm_multiply 807 16.1 33.360 33.360 33.360 33.360 dbt_tas_mm_1N 524 15.1 0.001 0.001 24.782 24.782 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 23.419 23.419 compute_mat_P_omega_calc_M_occ 250 9.0 8.857 8.857 16.546 16.546 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.008 8.008 dbt_tas_mm_2 251 15.0 0.001 0.001 6.969 6.969 dbt_copy 1103 10.7 0.067 0.067 5.403 5.403 compute_QP_energies 1 7.0 0.000 0.000 5.301 5.301 compute_self_energy_cubic_gw 1 8.0 0.054 0.054 5.300 5.300 contract_cubic_gw 21 9.0 0.000 0.000 4.241 4.241 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.689 3.689 dbt_tas_reserve_blocks_index 3261 14.3 0.147 0.147 3.375 3.375 dbm_reserve_blocks 3628 15.3 3.291 3.291 3.291 3.291 scf_env_do_scf 1 3.0 0.000 0.000 2.670 2.670 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.670 2.670 dbt_reserve_blocks_index 2280 13.1 0.055 0.055 2.593 2.593 dbt_reserve_blocks_index_array 2222 12.2 0.008 0.008 2.548 2.548 convert_to_new_pgrid 2421 14.1 0.125 0.125 2.428 2.428 dbm_copy 1614 15.1 2.303 2.303 2.303 2.303 dbt_crop 1042 12.0 1.394 1.394 2.181 2.181 dbt_tas_reshape 367 15.0 0.006 0.006 2.113 2.113 dbt_tas_copy 574 11.4 1.287 1.287 2.096 2.096 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.092 2.092 compute_W_cubic_GW 10 7.0 0.010 0.010 2.045 2.045 dbt_reshape 278 11.9 0.964 0.964 1.826 1.826 get_2c_integrals 1 6.0 0.000 0.000 1.767 1.767 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.025 31.476 31.485 qs_energies 1 2.0 0.000 0.000 31.351 31.353 mp2_main 1 3.0 0.000 0.001 30.374 30.376 mp2_gpw_main 1 4.0 0.000 0.000 30.336 30.338 rpa_ri_compute_en 1 5.0 0.000 0.000 29.053 29.054 rpa_num_int 1 6.0 0.000 0.003 29.052 29.054 dbt_total 2336 9.6 0.011 0.012 25.745 25.749 compute_mat_P_omega 1 7.0 0.001 0.005 24.588 24.605 compute_mat_P_omega_contract 10 8.0 0.397 0.419 24.472 24.476 dbt_contract 787 11.0 0.026 0.027 19.405 19.408 dbt_tas_total 1149 12.2 0.050 0.058 17.452 17.453 dbt_tas_multiply 807 12.1 0.002 0.002 17.398 17.399 dbt_tas_dbm 807 14.1 0.003 0.003 13.101 13.136 dbm_multiply 807 16.1 10.091 11.058 10.091 11.058 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.302 7.303 compute_mat_P_omega_calc_M_occ 250 9.0 0.383 0.403 7.230 7.230 mp_sync 8706 11.6 4.747 6.226 4.747 6.226 dbt_tas_mm_2 251 15.0 0.001 0.001 6.098 6.099 dbt_copy 1111 10.7 0.011 0.012 5.532 5.818 dbt_reshape 1098 11.7 2.107 2.260 5.260 5.533 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.240 5.241 dbt_tas_mm_1N 524 15.1 0.001 0.001 4.712 5.173 compute_QP_energies 1 7.0 0.000 0.000 2.816 2.817 compute_self_energy_cubic_gw 1 8.0 0.002 0.003 2.815 2.816 dbt_communicate_buffer 1098 12.7 0.053 0.057 2.501 2.620 mp_waitall_2 3776 15.3 2.464 2.611 2.464 2.611 contract_cubic_gw 21 9.0 0.000 0.000 2.249 2.249 dbt_reserve_blocks_index 2849 13.1 0.070 0.075 1.531 1.813 dbt_reserve_blocks_index_array 2791 12.2 0.008 0.010 1.522 1.803 dbt_tas_reserve_blocks_index 3300 14.5 0.115 0.127 1.495 1.776 dbm_reserve_blocks 3696 15.4 1.467 1.745 1.467 1.745 dbt_crop 1042 12.0 0.870 0.955 1.344 1.508 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.282 1.283 dbt_tas_replicate 396 14.1 0.526 0.690 1.076 1.149 parallel_gemm_fm 105 8.4 0.000 0.000 0.991 0.997 parallel_gemm_fm_cosma 105 9.4 0.991 0.997 0.991 0.997 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 0.943 0.946 scf_env_do_scf 1 3.0 0.000 0.000 0.938 0.938 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 0.938 0.938 convert_to_new_pgrid 2421 14.1 0.024 0.030 0.773 0.913 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 0.884 0.887 dbm_copy 1608 15.1 0.744 0.880 0.744 0.880 mp_max_i 1994 9.8 0.543 0.763 0.543 0.763 compute_W_cubic_GW 10 7.0 0.001 0.001 0.736 0.742 dbm_add 807 14.1 0.541 0.643 0.541 0.643 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.180000000000007, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=33.36, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.857, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.843, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.291, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.303, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=0.964, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=9.076, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.091, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.383, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.397, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.467, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.744, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.107, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.464, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=4.747, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.019 263.739 263.739 qs_forces 1 2.0 0.000 0.000 263.166 263.166 rebuild_ks_matrix 7 6.6 0.000 0.000 261.728 261.728 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 261.728 261.728 hfx_ks_matrix 7 8.6 0.000 0.000 259.864 259.864 hfx_ri_update_ks 7 9.6 0.000 0.000 224.195 224.195 hfx_ri_update_ks_Pmat 7 10.6 31.545 31.545 224.191 224.191 dbt_total 783 11.1 0.005 0.005 205.406 205.406 qs_energies 1 3.0 0.000 0.000 198.400 198.400 scf_env_do_scf 1 4.0 0.000 0.000 198.121 198.121 qs_ks_update_qs_env 8 6.0 0.000 0.000 197.011 197.011 dbt_contract 207 12.4 0.056 0.056 187.538 187.538 dbt_tas_total 317 14.0 1.314 1.314 185.921 185.921 dbt_tas_multiply 216 13.5 0.001 0.001 182.529 182.529 dbt_tas_dbm 216 15.5 0.001 0.001 174.462 174.462 dbm_multiply 216 17.5 174.459 174.459 174.459 174.459 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 169.147 169.147 dbt_tas_mm_2 91 16.5 0.001 0.001 160.536 160.536 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 124.553 124.553 init_scf_loop 2 5.0 0.000 0.000 73.566 73.566 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 64.720 64.720 hfx_ri_update_forces 1 7.0 1.649 1.649 35.666 35.666 hfx_ri_forces_Pmat_3c 1 8.0 4.841 4.841 18.747 18.747 dbt_copy 409 11.7 0.048 0.048 14.469 14.469 precalc_derivatives 1 8.0 2.243 2.243 13.108 13.108 dbt_reshape 132 13.2 6.000 6.000 10.035 10.035 dbt_tas_mm_3T 77 17.1 0.000 0.000 9.713 9.713 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.107 9.107 build_3c_derivatives 3 9.0 2.649 2.649 7.305 7.305 dbt_tas_reserve_blocks_index 1229 15.4 0.282 0.282 7.050 7.050 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 6.914 6.914 dbm_reserve_blocks 1345 16.4 6.864 6.864 6.864 6.864 dbt_reserve_blocks_index 818 14.4 0.102 0.102 5.586 5.586 dbt_reserve_blocks_index_array 795 13.4 0.007 0.007 5.505 5.505 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.029 42.436 42.445 qs_forces 1 2.0 0.000 0.000 42.246 42.246 rebuild_ks_matrix 7 6.6 0.000 0.000 41.553 41.554 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 41.553 41.554 hfx_ks_matrix 7 8.6 0.000 0.000 40.535 40.541 dbt_total 783 11.1 0.005 0.006 35.751 35.758 dbt_contract 207 12.4 0.021 0.022 28.118 28.135 dbt_tas_total 317 14.0 0.032 0.053 25.593 25.596 dbt_tas_multiply 216 13.5 0.001 0.001 25.054 25.055 hfx_ri_update_ks 7 9.6 0.000 0.000 24.685 24.685 hfx_ri_update_ks_Pmat 7 10.6 1.210 1.295 24.684 24.685 qs_energies 1 3.0 0.000 0.000 23.343 23.343 scf_env_do_scf 1 4.0 0.000 0.001 23.197 23.197 qs_ks_update_qs_env 8 6.0 0.000 0.000 22.661 22.662 dbt_tas_dbm 216 15.5 0.001 0.001 21.099 21.109 dbm_multiply 216 17.5 18.514 19.910 18.514 19.910 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 18.893 18.893 hfx_ri_update_forces 1 7.0 0.056 0.059 15.849 15.856 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 12.981 12.981 hfx_ri_update_ks_Pmat_KS 63 11.6 0.000 0.001 11.722 11.723 hfx_ri_forces_Pmat_3c 1 8.0 0.166 0.183 11.413 11.413 init_scf_loop 2 5.0 0.000 0.000 10.216 10.216 dbt_tas_mm_2 91 16.5 0.001 0.001 9.885 9.893 mp_sync 2677 13.0 4.594 7.898 4.594 7.898 dbt_copy 421 11.8 0.009 0.011 6.547 6.883 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.484 5.968 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 4.821 4.822 dbt_reshape 252 12.8 2.254 2.332 4.497 4.676 dbt_tas_mm_3N 37 15.4 0.000 0.000 3.977 4.160 precalc_derivatives 1 8.0 0.079 0.086 3.403 3.403 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.086 3.086 dbt_tas_reserve_blocks_index 1251 15.5 0.217 0.228 2.500 2.890 dbm_reserve_blocks 1375 16.5 2.367 2.746 2.367 2.746 dbt_crop 372 13.7 1.734 1.830 2.348 2.546 dbt_reserve_blocks_index 938 14.4 0.104 0.113 1.978 2.269 dbt_reserve_blocks_index_array 915 13.4 0.004 0.006 1.955 2.246 build_3c_derivatives 3 9.0 0.223 0.240 1.914 1.919 mp_waitall_2 1000 16.4 1.714 1.811 1.714 1.811 dbt_communicate_buffer 252 13.8 0.011 0.013 1.509 1.589 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.569 1.572 dbt_tas_copy 169 12.8 0.782 0.827 1.400 1.562 convert_to_new_pgrid 648 15.5 0.033 0.062 1.274 1.439 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.414 1.415 dbm_copy 452 16.3 1.102 1.275 1.102 1.275 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=40.029999999999944, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=174.459, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=31.545, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.864, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=6.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.841, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_crop", label="dbt_crop", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=11.596999999999994, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=18.514, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.21, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.367, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.254, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.166, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=4.594, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_crop", label="dbt_crop", y=1.734, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 174.709 174.709 qs_energies 1 2.0 0.000 0.000 174.521 174.521 mp2_main 1 3.0 0.000 0.000 169.745 169.745 mp2_gpw_main 1 4.0 0.001 0.001 169.315 169.315 mp2_ri_gpw_compute_in 1 5.0 0.388 0.388 123.716 123.716 mp2_ri_gpw_compute_in_loop 1 6.0 0.012 0.012 111.891 111.891 mp2_eri_3c_integrate_gpw 2656 7.0 0.015 0.015 80.994 80.994 integrate_v_rspace 2666 8.0 0.610 0.610 67.727 67.727 grid_integrate_task_list 2666 9.0 65.052 65.052 65.052 65.052 mp2_ri_gpw_compute_en 1 5.0 0.087 0.087 45.575 45.575 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.987 9.987 43.721 43.721 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.141 2.141 26.359 26.359 local_gemm 2080 8.0 24.218 24.218 24.218 24.218 dbcsr_multiply_generic 5322 8.0 0.183 0.183 21.222 21.222 ao_to_mo_and_store_B_mult_1 2656 7.0 0.011 0.011 21.201 21.201 pw_transfer 63872 10.6 1.063 1.063 12.169 12.169 calculate_wavefunction 2656 8.0 8.139 8.139 11.904 11.904 get_2c_integrals 1 6.0 0.000 0.000 11.435 11.435 multiply_cannon 5322 9.0 0.471 0.471 10.885 10.885 fft_wrap_pw1pw2 53228 11.4 0.114 0.114 10.870 10.870 compute_2c_integrals 1 7.0 0.008 0.008 10.448 10.448 compute_2c_integrals_loop_lm 1 8.0 0.007 0.007 10.419 10.419 mp2_eri_2c_integrate_gpw 1 9.0 3.326 3.326 10.412 10.412 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.620 2.620 9.584 9.584 multiply_cannon_loop 5322 10.0 0.154 0.154 9.477 9.477 make_m2s 10644 9.0 0.066 0.066 8.072 8.072 multiply_cannon_multrec 5322 11.0 7.884 7.884 7.924 7.924 make_images 10644 10.0 3.166 3.166 7.751 7.751 fft_wrap_pw1pw2_20 21271 12.4 0.485 0.485 7.622 7.622 copy_dbcsr_to_fm 2679 8.0 0.030 0.030 7.488 7.488 fft3d_s 53229 13.4 6.682 6.682 6.716 6.716 dbcsr_complete_redistribute 2689 9.0 1.166 1.166 5.934 5.934 mp2_ri_gpw_compute_en_ener 2080 7.0 5.436 5.436 5.436 5.436 dbcsr_finalize 10708 9.5 0.195 0.195 5.360 5.360 dbcsr_merge_all 8011 10.3 3.493 3.493 4.680 4.680 scf_env_do_scf 1 3.0 0.000 0.000 4.383 4.383 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.383 4.383 potential_pw2rs 5322 10.0 0.150 0.150 4.073 4.073 collocate_single_gaussian 2656 10.0 0.134 0.134 3.527 3.527 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.026 34.833 34.844 qs_energies 1 2.0 0.000 0.000 34.603 34.603 mp2_main 1 3.0 0.000 0.001 32.536 32.536 mp2_gpw_main 1 4.0 0.001 0.001 32.427 32.427 mp2_ri_gpw_compute_in 1 5.0 0.053 0.053 17.185 17.424 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 15.991 16.233 mp2_ri_gpw_compute_en 1 5.0 0.159 0.170 15.153 15.351 mp2_ri_gpw_compute_en_RI_loop 1 6.0 0.815 0.975 14.261 14.266 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 13.885 14.085 integrate_v_rspace 93 8.1 0.103 0.115 13.764 13.964 grid_integrate_task_list 93 9.1 13.451 13.646 13.451 13.646 mp2_ri_gpw_compute_en_expansio 65 7.0 0.099 0.135 10.719 10.861 local_gemm 65 8.0 10.620 10.736 10.620 10.736 mp2_ri_gpw_compute_en_comm 17 7.0 0.064 0.091 2.388 2.864 mp_sendrecv_dm3 1054 8.0 1.880 2.492 1.880 2.492 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.816 2.015 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.798 2.000 scf_env_do_scf 1 3.0 0.000 0.000 1.948 1.949 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 1.948 1.949 get_2c_integrals 1 6.0 0.000 0.000 1.125 1.157 multiply_cannon 176 9.0 0.016 0.018 1.096 1.154 multiply_cannon_loop 176 10.0 0.002 0.002 1.037 1.093 qs_scf_new_mos 10 5.0 0.000 0.000 0.948 0.958 multiply_cannon_multrec 246 11.0 0.897 0.931 0.902 0.937 eigensolver 11 5.8 0.001 0.001 0.917 0.918 compute_2c_integrals 1 7.0 0.002 0.003 0.832 0.848 make_m2s 352 9.0 0.003 0.003 0.683 0.821 make_images 352 10.0 0.051 0.053 0.670 0.808 pw_transfer 2120 10.5 0.045 0.047 0.763 0.772 compute_2c_integrals_loop_lm 1 8.0 0.002 0.004 0.738 0.766 mp2_eri_2c_integrate_gpw 1 9.0 0.199 0.212 0.736 0.765 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.757 0.757 cp_fm_redistribute_end 11 7.8 0.282 0.745 0.299 0.751 cp_fm_diag_elpa_base 11 7.8 0.437 0.703 0.449 0.720 fft_wrap_pw1pw2 1768 11.4 0.004 0.005 0.704 0.711 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=59.429, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=65.052, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=24.218, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.987, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.139, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.884, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=7.170000000000002, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.451, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=10.62, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=0.815, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.897, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.88, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.084 0.084 123.824 123.824 qs_energies 1 2.0 0.000 0.000 122.359 122.359 scf_env_do_scf 1 3.0 0.000 0.000 115.662 115.662 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 115.662 115.662 qs_ks_update_qs_env 15 5.0 0.000 0.000 48.804 48.804 rebuild_ks_matrix 15 6.0 0.000 0.000 48.594 48.594 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 48.594 48.594 qs_scf_new_mos 15 5.0 0.000 0.000 41.916 41.916 eigensolver 15 6.0 0.002 0.002 34.125 34.125 qs_vxc_create 15 8.0 0.039 0.039 33.439 33.439 calculate_dispersion_nonloc 15 9.0 7.051 7.051 29.121 29.121 pw_transfer 1191 10.0 0.060 0.060 22.741 22.741 fft_wrap_pw1pw2 1086 11.0 0.008 0.008 22.564 22.564 qs_rho_update_rho_low 16 5.0 0.000 0.000 22.076 22.076 calculate_rho_elec 16 6.0 0.216 0.216 22.076 22.076 cp_fm_diag_elpa 15 7.0 0.000 0.000 21.561 21.561 cp_fm_diag_elpa_base 15 8.0 19.073 19.073 21.561 21.561 grid_collocate_task_list 16 7.0 20.720 20.720 20.720 20.720 fft_wrap_pw1pw2_150 765 12.0 3.568 3.568 16.389 16.389 sum_up_and_integrate 15 8.0 0.144 0.144 14.001 14.001 integrate_v_rspace 15 9.0 0.019 0.019 13.857 13.857 grid_integrate_task_list 15 10.0 13.334 13.334 13.334 13.334 cp_fm_cholesky_restore 45 7.0 10.365 10.365 10.365 10.365 fft3d_s 1087 13.0 9.868 9.868 9.876 9.876 pw_scatter_s 585 13.1 7.220 7.220 7.220 7.220 fft_wrap_pw1pw2_200 197 12.3 0.777 0.777 5.986 5.986 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.580 5.580 dbcsr_complete_redistribute 46 8.3 2.268 2.268 5.575 5.575 cp_fm_upper_to_full 30 8.0 4.685 4.685 4.685 4.685 vdW_energy 15 10.0 4.300 4.300 4.300 4.300 xc_vxc_pw_create 15 9.0 0.228 0.228 4.279 4.279 gspace_mixing 14 5.0 0.170 0.170 4.102 4.102 broyden_mixing 14 6.0 3.465 3.465 3.465 3.465 init_scf_run 1 3.0 0.000 0.000 3.191 3.191 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.031 3.031 xc_pw_derive 90 11.0 0.001 0.001 2.791 2.791 calculate_dm_sparse 15 6.0 0.018 0.018 2.561 2.561 cp_dbcsr_plus_fm_fm_t_native 15 7.0 0.000 0.000 2.480 2.480 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.028 62.679 62.690 qs_energies 1 2.0 0.000 0.000 62.417 62.423 scf_env_do_scf 1 3.0 0.000 0.000 58.360 58.361 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 58.360 58.361 qs_ks_update_qs_env 15 5.0 0.000 0.000 25.217 25.225 rebuild_ks_matrix 15 6.0 0.000 0.000 25.183 25.191 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.003 25.183 25.191 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.052 21.057 calculate_rho_elec 16 6.0 0.007 0.007 21.052 21.056 grid_collocate_task_list 16 7.0 19.579 19.817 19.579 19.817 sum_up_and_integrate 15 8.0 0.013 0.018 14.255 14.305 integrate_v_rspace 15 9.0 0.000 0.001 14.242 14.299 grid_integrate_task_list 15 10.0 13.137 13.325 13.137 13.325 qs_scf_new_mos 15 5.0 0.000 0.000 12.721 12.753 eigensolver 15 6.0 0.001 0.002 11.786 11.829 qs_vxc_create 15 8.0 0.001 0.001 10.601 10.613 pw_transfer 1191 10.0 0.093 0.107 8.526 8.609 calculate_dispersion_nonloc 15 9.0 0.906 0.929 8.557 8.579 fft_wrap_pw1pw2 1086 11.0 0.012 0.015 8.347 8.452 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.245 8.248 cp_fm_diag_elpa_base 15 8.0 8.099 8.130 8.241 8.241 fft3d_ps 1086 13.0 2.645 3.047 6.718 6.990 fft_wrap_pw1pw2_150 765 12.0 0.261 0.311 5.497 5.535 mp_alltoall_z22v 1086 15.0 2.523 3.681 2.523 3.681 cp_fm_cholesky_restore 45 7.0 3.397 3.430 3.397 3.430 fft_wrap_pw1pw2_200 197 12.3 0.177 0.220 2.723 2.793 x_to_yz 585 14.1 0.891 1.068 2.143 2.531 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.445 2.445 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.135 2.345 yz_to_x 501 13.9 0.633 0.839 1.904 2.293 xc_vxc_pw_create 15 9.0 0.015 0.020 2.044 2.072 rs_pw_transfer 158 9.4 0.001 0.002 1.201 1.494 density_rs2pw 16 7.0 0.001 0.001 1.278 1.490 xc_pw_derive 90 11.0 0.001 0.002 1.403 1.474 init_scf_run 1 3.0 0.000 0.001 1.387 1.387 build_core_ppnl 1 5.0 1.247 1.379 1.247 1.379 vdW_energy 15 10.0 1.277 1.345 1.277 1.345 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.297 1.298 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=50.464, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.72, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=19.073, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.334, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.365, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.868, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.822000000000003, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.579, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.099, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.137, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.397, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.645, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.078 0.078 326.537 326.537 qs_energies 1 2.0 0.000 0.000 326.399 326.399 ls_scf 1 3.0 0.000 0.000 325.156 325.156 ls_scf_main 1 4.0 0.002 0.002 316.257 316.257 density_matrix_trs4 11 5.0 0.012 0.012 224.852 224.852 arnoldi_extremal 12 6.1 0.000 0.000 141.345 141.345 arnoldi_normal_ev 12 7.1 0.031 0.031 141.345 141.345 build_subspace 23 8.1 0.084 0.084 138.876 138.876 dbcsr_matrix_vector_mult 652 9.0 0.202 0.202 138.808 138.808 dbcsr_matrix_vector_mult_local 652 10.0 137.297 137.297 137.307 137.307 ls_scf_dm_to_ks 11 5.0 0.000 0.000 86.090 86.090 matrix_ls_to_qs 11 6.0 0.000 0.000 82.856 82.856 dbcsr_multiply_generic 185 6.1 0.826 0.826 72.586 72.586 dbcsr_copy_into_existing 11 7.0 44.534 44.534 44.534 44.534 multiply_cannon 185 7.1 0.281 0.281 43.727 43.727 dbcsr_complete_redistribute 23 7.5 30.869 30.869 42.109 42.109 matrix_decluster 11 7.0 0.000 0.000 38.320 38.320 multiply_cannon_loop 185 8.1 0.205 0.205 31.626 31.626 make_m2s 370 7.1 0.037 0.037 24.555 24.555 make_images 370 8.1 10.601 10.601 22.987 22.987 multiply_cannon_multrec 185 9.1 22.646 22.646 22.672 22.672 dbcsr_finalize 646 7.5 0.160 0.160 14.634 14.634 dbcsr_merge_all 597 8.5 2.087 2.087 13.570 13.570 setup_rec_index_2d 370 8.1 11.733 11.733 11.733 11.733 tree_to_linear_d 110 9.4 10.165 10.165 10.165 10.165 dbcsr_sort_indices 1103 9.9 10.127 10.127 10.127 10.127 quick_finalize 395 10.0 0.384 0.384 8.768 8.768 calculate_norms 370 9.1 8.748 8.748 8.748 8.748 ls_scf_init_scf 1 4.0 0.000 0.000 8.208 8.208 dbcsr_special_finalize 370 9.1 0.002 0.002 8.099 8.099 ls_scf_init_matrix_S 1 5.0 0.000 0.000 7.878 7.878 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.230 7.230 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.021 65.657 65.667 qs_energies 1 2.0 0.000 0.000 65.551 65.552 ls_scf 1 3.0 0.000 0.000 65.503 65.503 ls_scf_main 1 4.0 0.001 0.009 62.952 62.954 density_matrix_trs4 11 5.0 0.006 0.019 60.510 60.577 dbcsr_multiply_generic 185 6.1 0.058 0.077 57.204 57.496 multiply_cannon 185 7.1 0.034 0.036 47.772 48.343 multiply_cannon_loop 185 8.1 0.120 0.135 45.365 46.276 multiply_cannon_multrec 1480 9.1 27.977 29.857 28.251 30.128 mp_waitall_1 11936 10.3 14.557 16.186 14.557 16.186 multiply_cannon_metrocomm3 1480 9.1 0.014 0.018 8.679 11.603 make_m2s 370 7.1 0.034 0.038 6.594 6.664 make_images 370 8.1 0.622 0.656 6.464 6.535 calculate_norms 2960 9.1 5.027 5.758 5.027 5.758 multiply_cannon_metrocomm1 1480 9.1 0.008 0.012 3.230 4.902 make_images_data 370 9.1 0.009 0.011 2.892 3.096 hybrid_alltoall_any 393 9.9 0.188 0.967 2.528 2.721 arnoldi_extremal 12 6.1 0.000 0.000 2.465 2.479 arnoldi_normal_ev 12 7.1 0.002 0.008 2.464 2.478 build_subspace 23 8.1 0.020 0.025 2.375 2.377 mp_sum_l 1119 5.6 1.747 2.295 1.747 2.295 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.115 2.189 dbcsr_matrix_vector_mult 652 9.0 0.013 0.058 1.965 2.052 dbcsr_complete_redistribute 23 7.5 1.185 1.281 1.905 1.995 ls_scf_init_scf 1 4.0 0.000 0.000 1.958 1.958 matrix_ls_to_qs 11 6.0 0.000 0.000 1.861 1.954 ls_scf_init_matrix_S 1 5.0 0.000 0.000 1.933 1.940 make_images_pack 370 9.1 1.643 1.864 1.647 1.867 matrix_decluster 11 7.0 0.000 0.000 1.720 1.814 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.767 1.771 dbcsr_matrix_vector_mult_local 652 10.0 1.602 1.665 1.604 1.667 dbcsr_finalize 646 7.5 0.008 0.008 1.445 1.608 buffer_matrices_ensure_size 370 8.1 1.328 1.434 1.328 1.434 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 0.907 1.379 dbcsr_data_release 12861 10.1 0.960 1.347 0.960 1.347 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=70.70999999999998, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=137.297, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=44.534, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=30.869, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=22.646, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=11.733, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=8.748, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=11.918999999999997, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.602, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.185, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=27.977, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.027, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=14.557, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.747, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.643, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 70.818 70.818 lib_test 1 2.0 0.000 0.000 70.810 70.810 dbcsr_run_tests 3 3.0 0.002 0.002 70.810 70.810 test_multiplies_multiproc 3 4.0 0.001 0.001 54.630 54.630 dbcsr_redistribute 9 5.0 35.555 35.555 37.128 37.128 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.169 16.169 dbcsr_make_random_matrix 9 4.0 13.005 13.005 16.072 16.072 multiply_cannon 9 6.0 0.001 0.001 11.501 11.501 multiply_cannon_loop 9 7.0 0.014 0.014 11.145 11.145 multiply_cannon_multrec 9 8.0 11.131 11.131 11.131 11.131 dbcsr_finalize 27 5.7 0.003 0.003 5.591 5.591 dbcsr_merge_all 18 6.5 1.979 1.979 4.857 4.857 dbcsr_data_release 975 7.6 2.797 2.797 2.797 2.797 tree_to_linear_d 9 7.0 1.890 1.890 1.890 1.890 make_m2s 18 6.0 0.001 0.001 1.578 1.578 make_images 18 7.0 0.549 0.549 1.531 1.531 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.010 17.669 17.674 lib_test 1 2.0 0.000 0.000 17.642 17.659 dbcsr_run_tests 3 3.0 0.000 0.001 17.641 17.659 test_multiplies_multiproc 3 4.0 0.000 0.003 16.802 16.842 dbcsr_multiply_generic 9 5.0 0.001 0.001 15.409 15.504 multiply_cannon 9 6.0 0.002 0.002 13.693 13.971 multiply_cannon_loop 9 7.0 0.002 0.002 13.406 13.722 multiply_cannon_multrec 72 8.0 11.278 11.646 11.279 11.647 mp_waitall_1 576 9.2 2.439 2.922 2.439 2.922 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 1.846 2.462 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.274 0.975 dbcsr_data_release 444 7.6 0.744 0.852 0.744 0.852 dbcsr_make_random_matrix 9 4.0 0.662 0.669 0.809 0.832 mp_sum_l 390 2.5 0.391 0.811 0.391 0.811 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.382 0.802 dbcsr_finalize 27 5.7 0.000 0.000 0.696 0.787 dbcsr_destroy 111 5.9 0.000 0.000 0.596 0.735 make_m2s 18 6.0 0.001 0.001 0.669 0.710 make_images 18 7.0 0.021 0.021 0.666 0.706 dbcsr_merge_all 18 6.5 0.100 0.124 0.546 0.613 dbcsr_redistribute 9 5.0 0.236 0.273 0.479 0.506 make_images_data 18 8.0 0.001 0.001 0.350 0.421 dbcsr_data_copy_aa2 18 7.5 0.327 0.378 0.327 0.378 hybrid_alltoall_any 18 9.0 0.027 0.134 0.308 0.360 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.350999999999999, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=35.555, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.005, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.131, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.797, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=1.979, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=1.8190000000000008, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.236, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.662, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.278, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.744, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.1, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.439, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.391, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.056 0.056 133.612 133.612 qs_mol_dyn_low 1 2.0 0.003 0.003 132.149 132.149 velocity_verlet 5 3.0 0.003 0.003 107.898 107.898 qmmm_el_coupling 6 3.8 0.000 0.000 88.612 88.612 qmmm_elec_with_gaussian 6 4.8 0.012 0.012 88.608 88.608 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 87.976 87.976 qmmm_elec_gaussian_low_G 6 6.8 87.047 87.047 87.047 87.047 qs_forces 6 3.8 0.000 0.000 34.956 34.956 qs_energies 6 4.8 0.000 0.000 30.985 30.985 scf_env_do_scf 6 5.8 0.001 0.001 28.681 28.681 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.041 25.041 rebuild_ks_matrix 45 8.4 0.000 0.000 24.135 24.135 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.135 24.135 qs_ks_update_qs_env 45 7.8 0.000 0.000 20.595 20.595 pw_transfer 966 12.3 0.052 0.052 17.246 17.246 fft_wrap_pw1pw2 801 13.6 0.006 0.006 17.034 17.034 fft_wrap_pw1pw2_150 507 15.2 2.263 2.263 16.624 16.624 qs_vxc_create 45 10.4 0.001 0.001 13.277 13.277 xc_vxc_pw_create 45 11.4 0.645 0.645 13.277 13.277 xc_pw_derive 270 13.4 0.002 0.002 9.322 9.322 fft3d_s 802 15.6 7.822 7.822 7.831 7.831 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.194 7.194 calculate_rho_elec 45 8.9 0.564 0.564 7.194 7.194 xc_rho_set_and_dset_create 45 12.4 0.629 0.629 6.772 6.772 xc_pw_divergence 45 12.4 0.001 0.001 5.804 5.804 pw_scatter_s 429 15.8 5.650 5.650 5.650 5.650 qmmm_forces 6 3.8 0.001 0.001 5.225 5.225 qmmm_forces_with_gaussian 6 4.8 0.017 0.017 4.879 4.879 pw_integral_ab 2539 7.4 4.462 4.462 4.462 4.462 qs_ks_ddapc 45 10.4 0.001 0.001 4.173 4.173 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.156 4.156 init_scf_loop 6 6.8 0.000 0.000 3.634 3.634 sum_up_and_integrate 45 10.4 0.506 0.506 3.591 3.591 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.548 3.548 qmmm_forces_gaussian_low_G 6 6.8 3.460 3.460 3.460 3.460 grid_collocate_task_list 45 9.9 3.345 3.345 3.345 3.345 density_rs2pw 45 9.9 0.002 0.002 3.284 3.284 integrate_v_rspace 45 11.4 0.006 0.006 3.085 3.085 fist_calc_energy_force 6 3.8 0.001 0.001 2.727 2.727 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.038 0.054 58.096 58.107 qs_mol_dyn_low 1 2.0 0.003 0.003 57.020 57.078 qs_forces 6 3.8 0.001 0.001 42.589 42.590 qs_energies 6 4.8 0.000 0.000 40.663 40.663 scf_env_do_scf 6 5.8 0.000 0.001 39.687 39.688 scf_env_do_scf_inner_loop 113 6.2 0.002 0.019 38.094 38.095 rebuild_ks_matrix 119 8.1 0.000 0.000 28.496 28.505 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.016 28.496 28.505 qs_ks_update_qs_env 119 7.3 0.001 0.001 26.853 26.861 velocity_verlet 5 3.0 0.002 0.004 23.418 23.421 pw_transfer 2446 12.3 0.213 0.227 20.880 21.420 fft_wrap_pw1pw2 2059 13.4 0.024 0.027 20.423 20.954 fft_wrap_pw1pw2_150 1321 14.9 1.300 1.410 19.716 20.186 fft3d_ps 2059 15.4 7.303 8.019 16.276 16.912 qs_vxc_create 119 10.1 0.002 0.003 15.628 15.630 xc_vxc_pw_create 119 11.1 0.148 0.203 15.625 15.628 xc_pw_derive 714 13.1 0.010 0.011 12.025 12.291 qs_rho_update_rho_low 119 7.3 0.000 0.001 11.213 11.215 calculate_rho_elec 119 8.3 0.049 0.054 11.213 11.215 sum_up_and_integrate 119 10.1 0.083 0.096 9.301 9.317 integrate_v_rspace 119 11.1 0.003 0.003 9.218 9.237 xc_pw_divergence 119 12.1 0.005 0.005 7.909 8.133 xc_rho_set_and_dset_create 119 12.1 0.358 0.430 7.379 7.511 qmmm_forces 6 3.8 0.002 0.002 7.370 7.371 qmmm_forces_with_gaussian 6 4.8 0.008 0.012 7.088 7.206 density_rs2pw 119 9.3 0.006 0.007 6.648 6.857 rs_pw_transfer 988 11.5 0.011 0.013 6.228 6.462 qmmm_el_coupling 6 3.8 0.000 0.000 6.207 6.295 qmmm_elec_with_gaussian 6 4.8 0.003 0.003 6.205 6.293 potential_pw2rs 119 12.1 0.006 0.007 5.543 5.556 mp_alltoall_z22v 2059 17.4 4.635 5.527 4.635 5.527 x_to_yz 1095 16.8 2.449 2.733 4.742 5.013 grid_collocate_task_list 119 9.3 4.406 4.592 4.406 4.592 yz_to_x 964 16.0 1.824 2.093 4.166 4.486 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.845 3.945 grid_integrate_task_list 119 12.1 3.416 3.590 3.416 3.590 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.330 3.438 qmmm_forces_gaussian_low_G 6 6.8 3.165 3.261 3.165 3.261 qmmm_elec_gaussian_low_G 6 6.8 2.742 2.844 2.742 2.844 pw_restrict_s3 18 5.8 1.411 1.447 2.625 2.705 mp_waitany 4028 12.8 2.280 2.554 2.280 2.554 rs_pw_transfer_PW2RS_150 125 13.9 0.744 0.800 2.335 2.367 qs_ks_ddapc 119 10.1 0.002 0.002 2.239 2.301 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.152 2.238 pw_prolongate_s3 18 6.8 1.158 1.200 2.152 2.238 rs_pw_transfer_RS2PW_150 125 11.2 0.587 0.672 1.805 2.044 qs_scf_new_mos 113 7.2 0.000 0.000 1.990 1.998 qs_scf_loop_do_ot 113 8.2 0.000 0.001 1.989 1.998 ot_scf_mini 113 9.2 0.001 0.001 1.905 1.909 dbcsr_multiply_generic 2588 12.3 0.057 0.058 1.800 1.840 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.652 1.653 init_scf_loop 6 6.8 0.000 0.000 1.590 1.590 pw_integral_ab 2761 7.7 1.250 1.308 1.496 1.571 pw_scatter_p 1095 15.8 1.535 1.562 1.535 1.562 pw_gather_p 964 15.0 1.258 1.418 1.258 1.418 mp_sum_dm3 33 5.7 1.229 1.282 1.229 1.282 mp_waitall_1 177795 16.4 1.141 1.230 1.141 1.230 ot_mini 113 10.2 0.000 0.001 1.176 1.182 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=21.825999999999993, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=87.047, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.822, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.65, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.462, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.46, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.345, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=31.179, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.742, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.25, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.165, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.406, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.303, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.635, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.416, yerr=0.0 Summary: Performance test took 33 minutes. Status: OK Removing intermediate container 8c86fd98b9f3 ---> 5687c39d0826 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 81321b1c8f1d Removing intermediate container 81321b1c8f1d ---> ae24f7754f0b Step 42/42 : ENTRYPOINT [] ---> Running in e5e2284fa81b Removing intermediate container e5e2284fa81b ---> f557dd995f25 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built f557dd995f25 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2023-01-26 19:56:20+00:00