StartDate: 2023-06-24 20:40:30+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: a60e880d3604a544f9b48a09c0285b22a1c88942 CommitTime: 2023-06-24 15:00:58 +0200 CommitAuthor: Ole Schütt CommitSubject: Add initializers to qs_oce_types and sap_kind_types #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=a60e880d3604a544f9b48a09c0285b22a1c88942 Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 371.1MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 6b851dcae6ca: Pulling fs layer 6b851dcae6ca: Verifying Checksum 6b851dcae6ca: Download complete 6b851dcae6ca: Pull complete Digest: sha256:6120be6a2b7ce665d0cbddc3ce6eae60fe94637c6a66985312d1f02f63cc0bcd Status: Downloaded newer image for ubuntu:22.04 ---> 99284ca6cea0 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> a4b0c172f62f Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 6bb3b047e99c Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 73d80ef9e4a3 Step 5/42 : RUN mkdir scripts ---> Using cache ---> b7df9fba15d6 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 5c15e99590c4 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> d49e3391ce09 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> fda0d3a90bfc Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 11c1a44a6047 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> c0966e1e9481 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 2ca131548388 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> e81a3481b9f8 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> e4cc5157ace6 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> fce6a9f1c0f0 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 20082ea74ce0 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 1ed0f8ec5ae4 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 0d45c2699d8d Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 786587ed7b92 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> b117d04fe9c9 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 2089654a5431 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> a2413d2964a0 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> f37288abb7ef Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> bad1ddb6076e Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 1281072d1860 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> dd1a8cf997d1 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 651a0762955e Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 8754272b0a19 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> adb5af6166e4 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> e41b12ebf40c Step 30/42 : COPY ./Makefile . ---> Using cache ---> f6260b8939fb Step 31/42 : COPY ./src ./src ---> Using cache ---> 13de02dd12c9 Step 32/42 : COPY ./exts ./exts ---> Using cache ---> 288daa72eccd Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 181811352618 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && rm -rf lib obj && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in fa027d6d48eb './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container fa027d6d48eb ---> a69d64130f42 Step 35/42 : COPY ./data ./data ---> 55b446afd8e1 Step 36/42 : COPY ./tests ./tests ---> 5ffdf17fdb58 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> dd762f5b4b56 Step 38/42 : COPY ./benchmarks ./benchmarks ---> 27b9ea3ed90b Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 43f20ce0da83 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 7f49a6d4705d ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 70 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.029 0.029 89.374 89.374 qs_mol_dyn_low 1 2.0 0.003 0.003 88.713 88.713 qs_forces 11 3.9 0.001 0.001 88.674 88.674 qs_energies 11 4.9 0.001 0.001 82.582 82.582 scf_env_do_scf 11 5.9 0.001 0.001 71.646 71.646 velocity_verlet 10 3.0 0.002 0.002 57.243 57.243 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 54.990 54.990 rebuild_ks_matrix 119 8.3 0.001 0.001 20.834 20.834 qs_ks_build_kohn_sham_matrix 119 9.3 0.013 0.013 20.833 20.833 qs_scf_new_mos 108 7.5 0.001 0.001 20.060 20.060 qs_scf_loop_do_ot 108 8.5 0.001 0.001 20.059 20.059 dbcsr_multiply_generic 2286 12.5 0.170 0.170 19.903 19.903 qs_rho_update_rho_low 119 7.7 0.001 0.001 19.448 19.448 calculate_rho_elec 119 8.7 0.952 0.952 19.447 19.447 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.216 19.216 ot_scf_mini 108 9.5 0.002 0.002 18.639 18.639 init_scf_loop 11 6.9 0.000 0.000 16.538 16.538 grid_collocate_task_list 119 9.7 14.899 14.899 14.899 14.899 prepare_preconditioner 11 7.9 0.000 0.000 14.106 14.106 make_preconditioner 11 8.9 0.000 0.000 14.106 14.106 sum_up_and_integrate 119 10.3 0.802 0.802 13.391 13.391 make_full_inverse_cholesky 11 9.9 0.025 0.025 12.952 12.952 integrate_v_rspace 119 11.3 0.092 0.092 12.588 12.588 ot_mini 108 10.5 0.001 0.001 11.794 11.794 make_m2s 4572 13.5 0.047 0.047 10.911 10.911 grid_integrate_task_list 119 12.3 10.613 10.613 10.613 10.613 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.577 6.577 pw_transfer 1439 11.6 0.069 0.069 6.315 6.315 qs_ot_get_derivative 108 11.5 0.001 0.001 6.146 6.146 fft_wrap_pw1pw2 1201 12.6 0.007 0.007 6.081 6.081 dbcsr_make_dense_low 5837 15.5 0.069 0.069 5.856 5.856 make_dense_data 5837 16.5 5.249 5.249 5.773 5.773 ot_diis_step 108 11.5 0.004 0.004 5.645 5.645 make_images 4572 14.5 2.141 2.141 5.444 5.444 fft_wrap_pw1pw2_140 487 13.2 0.455 0.455 5.193 5.193 dbcsr_make_images_dense 3978 14.8 0.017 0.017 5.157 5.157 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.096 5.096 apply_single 119 13.6 0.000 0.000 5.096 5.096 multiply_cannon 2286 13.5 0.184 0.184 5.083 5.083 cp_fm_cholesky_decompose 22 10.9 4.733 4.733 4.733 4.733 multiply_cannon_loop 2286 14.5 0.060 0.060 4.607 4.607 multiply_cannon_multrec 2286 15.5 4.496 4.496 4.546 4.546 cp_fm_cholesky_invert 11 10.9 4.120 4.120 4.120 4.120 init_scf_run 11 5.9 0.002 0.002 3.738 3.738 scf_env_initial_rho_setup 11 6.9 0.001 0.001 3.736 3.736 dbcsr_copy 2102 12.0 0.235 0.235 3.719 3.719 dbcsr_complete_redistribute 329 12.2 1.923 1.923 3.655 3.655 density_rs2pw 119 9.7 0.005 0.005 3.596 3.596 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.578 3.578 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.493 3.493 dbcsr_copy_into_existing 22 7.9 3.439 3.439 3.439 3.439 qs_ot_get_p 119 10.4 0.001 0.001 3.304 3.304 wfi_extrapolate 11 7.9 0.001 0.001 3.249 3.249 qs_create_task_list 11 7.9 0.000 0.000 3.210 3.210 generate_qs_task_list 11 8.9 1.957 1.957 3.210 3.210 fft3d_s 1202 14.6 3.051 3.051 3.056 3.056 copy_dbcsr_to_fm 153 11.3 0.002 0.002 2.938 2.938 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.597 2.597 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.589 2.589 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.277 2.277 dbcsr_data_release 279532 16.0 2.179 2.179 2.179 2.179 pw_poisson_solve 119 10.3 0.354 0.354 2.041 2.041 qs_ot_p2m_diag 50 11.0 0.154 0.154 2.006 2.006 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.943 1.943 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.898 1.898 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.886 1.886 potential_pw2rs 119 12.3 0.046 0.046 1.883 1.883 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.027 47.411 47.422 qs_mol_dyn_low 1 2.0 0.003 0.004 47.265 47.269 qs_forces 11 3.9 0.002 0.002 47.223 47.223 qs_energies 11 4.9 0.001 0.001 44.172 44.175 scf_env_do_scf 11 5.9 0.000 0.002 40.564 40.567 scf_env_do_scf_inner_loop 108 6.5 0.003 0.022 37.353 37.354 velocity_verlet 10 3.0 0.001 0.003 28.028 28.029 rebuild_ks_matrix 119 8.3 0.001 0.001 17.043 17.103 qs_ks_build_kohn_sham_matrix 119 9.3 0.016 0.017 17.042 17.102 qs_ks_update_qs_env 119 7.6 0.001 0.001 15.133 15.191 dbcsr_multiply_generic 2286 12.5 0.078 0.082 13.894 14.026 qs_rho_update_rho_low 119 7.7 0.001 0.001 13.791 13.804 calculate_rho_elec 119 8.7 0.030 0.032 13.791 13.804 sum_up_and_integrate 119 10.3 0.034 0.038 12.615 12.644 integrate_v_rspace 119 11.3 0.004 0.007 12.581 12.611 qs_scf_new_mos 108 7.5 0.001 0.001 11.239 11.342 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.238 11.341 ot_scf_mini 108 9.5 0.002 0.002 10.574 10.672 multiply_cannon 2286 13.5 0.149 0.164 10.102 10.318 multiply_cannon_loop 2286 14.5 0.097 0.114 9.502 9.777 grid_collocate_task_list 119 9.7 9.042 9.610 9.042 9.610 grid_integrate_task_list 119 12.3 8.178 8.368 8.178 8.368 mp_waitall_1 158411 16.6 7.697 8.313 7.697 8.313 multiply_cannon_metrocomm3 18288 15.5 0.042 0.048 5.844 6.272 ot_mini 108 10.5 0.001 0.001 6.090 6.200 rs_pw_transfer 974 11.9 0.012 0.015 4.708 5.151 density_rs2pw 119 9.7 0.005 0.006 4.075 4.492 pw_transfer 1439 11.6 0.079 0.087 4.081 4.196 fft_wrap_pw1pw2 1201 12.6 0.008 0.009 3.931 4.032 potential_pw2rs 119 12.3 0.007 0.008 3.598 3.624 multiply_cannon_multrec 18288 15.5 3.244 3.522 3.255 3.534 fft_wrap_pw1pw2_140 487 13.2 0.611 0.783 3.305 3.504 init_scf_loop 11 6.9 0.000 0.000 3.193 3.193 qs_ot_get_derivative 108 11.5 0.001 0.001 3.052 3.153 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.957 3.039 apply_single 119 13.6 0.000 0.001 2.957 3.039 ot_diis_step 108 11.5 0.004 0.004 3.015 3.015 fft3d_ps 1201 14.6 1.279 1.472 2.578 2.755 make_m2s 4572 13.5 0.046 0.049 2.538 2.621 init_scf_run 11 5.9 0.000 0.005 2.544 2.544 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.544 2.544 mp_waitany 9880 13.7 1.838 2.387 1.838 2.387 wfi_extrapolate 11 7.9 0.001 0.001 2.306 2.306 make_images 4572 14.5 0.118 0.121 2.207 2.286 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.046 2.056 rs_pw_transfer_RS2PW_140 130 11.5 0.173 0.203 1.292 1.740 qs_ot_get_p 119 10.4 0.001 0.001 1.542 1.696 rs_pw_transfer_PW2RS_140 130 13.9 0.357 0.465 1.333 1.386 make_images_data 4572 15.5 0.034 0.040 1.211 1.379 mp_alltoall_z22v 1201 16.6 1.047 1.364 1.047 1.364 mp_alltoall_d11v 2130 13.8 1.049 1.357 1.049 1.357 prepare_preconditioner 11 7.9 0.000 0.000 1.293 1.320 make_preconditioner 11 8.9 0.000 0.000 1.293 1.320 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.173 1.196 hybrid_alltoall_any 4725 16.4 0.061 0.181 1.022 1.186 mp_sum_l 11298 13.2 0.791 1.144 0.791 1.144 mp_sum_d 4135 12.0 0.703 1.113 0.703 1.113 rs_gather_matrices 119 12.3 0.097 0.111 0.742 1.035 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.942 1.028 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.982 1.011 rs_pw_transfer_PW2RS_50 119 14.3 0.319 0.377 0.895 0.985 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.891 0.968 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.955 0.957 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=49.38399999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.899, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.613, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.249, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.733, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.496, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.412000000000003, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.042, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.178, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.244, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.838, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.697, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.034 113.934 113.934 qs_mol_dyn_low 1 2.0 0.003 0.003 113.231 113.231 qs_forces 11 3.9 0.001 0.001 113.192 113.192 qs_energies 11 4.9 0.001 0.001 105.294 105.294 scf_env_do_scf 11 5.9 0.001 0.001 92.047 92.047 velocity_verlet 10 3.0 0.002 0.002 73.082 73.082 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 73.032 73.032 rebuild_ks_matrix 107 8.3 0.001 0.001 34.036 34.036 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 34.036 34.036 qs_rho_update_rho_low 107 7.7 0.001 0.001 31.776 31.776 calculate_rho_elec 107 8.7 0.855 0.855 31.775 31.775 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.541 30.541 grid_collocate_task_list 107 9.7 27.534 27.534 27.534 27.534 sum_up_and_integrate 107 10.3 0.732 0.732 27.186 27.186 integrate_v_rspace 107 11.3 0.083 0.083 26.453 26.453 grid_integrate_task_list 107 12.3 24.600 24.600 24.600 24.600 init_scf_loop 11 6.9 0.000 0.000 18.848 18.848 dbcsr_multiply_generic 1966 12.4 0.143 0.143 18.123 18.123 qs_scf_new_mos 96 7.5 0.001 0.001 17.616 17.616 qs_scf_loop_do_ot 96 8.5 0.001 0.001 17.616 17.616 ot_scf_mini 96 9.5 0.002 0.002 16.385 16.385 prepare_preconditioner 11 7.9 0.000 0.000 14.677 14.677 make_preconditioner 11 8.9 0.000 0.000 14.677 14.677 make_full_inverse_cholesky 11 9.9 0.025 0.025 13.505 13.505 ot_mini 96 10.5 0.001 0.001 10.555 10.555 make_m2s 3932 13.4 0.041 0.041 9.961 9.961 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.169 7.169 pw_transfer 1295 11.6 0.059 0.059 5.859 5.859 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.668 5.668 qs_ot_get_derivative 96 11.5 0.001 0.001 5.558 5.558 init_scf_run 11 5.9 0.002 0.002 5.436 5.436 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.434 5.434 dbcsr_make_dense_low 4961 15.5 0.060 0.060 5.299 5.299 make_dense_data 4961 16.5 4.711 4.711 5.226 5.226 make_images 3932 14.4 1.960 1.960 4.998 4.998 ot_diis_step 96 11.5 0.003 0.003 4.994 4.994 cp_fm_cholesky_decompose 22 10.9 4.972 4.972 4.972 4.972 fft_wrap_pw1pw2_140 439 13.2 0.561 0.561 4.945 4.945 wfi_extrapolate 11 7.9 0.001 0.001 4.753 4.753 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.702 4.702 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.680 4.680 apply_single 107 13.6 0.000 0.000 4.679 4.679 multiply_cannon 1966 13.4 0.160 0.160 4.671 4.671 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.364 4.364 multiply_cannon_loop 1966 14.4 0.037 0.037 4.252 4.252 cp_fm_cholesky_invert 11 10.9 4.232 4.232 4.232 4.232 multiply_cannon_multrec 1966 15.4 4.169 4.169 4.214 4.214 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.107 4.107 dbcsr_copy 1855 11.9 0.216 0.216 3.759 3.759 qs_create_task_list 11 7.9 0.000 0.000 3.718 3.718 generate_qs_task_list 11 8.9 2.444 2.444 3.717 3.717 dbcsr_complete_redistribute 317 12.2 1.915 1.915 3.709 3.709 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.532 3.532 dbcsr_copy_into_existing 22 7.9 3.509 3.509 3.509 3.509 density_rs2pw 107 9.7 0.004 0.004 3.387 3.387 copy_dbcsr_to_fm 147 11.2 0.002 0.002 3.148 3.148 fft3d_s 1082 14.6 2.780 2.780 2.785 2.785 qs_ot_get_p 107 10.4 0.001 0.001 2.656 2.656 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.642 2.642 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.490 2.490 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.027 73.426 73.436 qs_mol_dyn_low 1 2.0 0.003 0.003 73.281 73.285 qs_forces 11 3.9 0.002 0.002 73.238 73.239 qs_energies 11 4.9 0.001 0.001 68.352 68.355 scf_env_do_scf 11 5.9 0.000 0.002 63.197 63.198 scf_env_do_scf_inner_loop 96 6.5 0.003 0.020 58.424 58.426 velocity_verlet 10 3.0 0.001 0.003 43.618 43.619 rebuild_ks_matrix 107 8.3 0.000 0.001 31.441 31.517 qs_ks_build_kohn_sham_matrix 107 9.3 0.015 0.017 31.440 31.516 qs_ks_update_qs_env 107 7.6 0.001 0.001 27.675 27.743 sum_up_and_integrate 107 10.3 0.031 0.037 27.447 27.490 integrate_v_rspace 107 11.3 0.004 0.005 27.415 27.461 qs_rho_update_rho_low 107 7.7 0.001 0.001 26.427 26.438 calculate_rho_elec 107 8.7 0.027 0.029 26.427 26.438 grid_integrate_task_list 107 12.3 22.928 23.293 22.928 23.293 grid_collocate_task_list 107 9.7 21.988 22.364 21.988 22.364 dbcsr_multiply_generic 1966 12.4 0.070 0.074 12.661 12.817 qs_scf_new_mos 96 7.5 0.001 0.001 10.162 10.332 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.162 10.332 ot_scf_mini 96 9.5 0.002 0.002 9.592 9.781 multiply_cannon 1966 13.4 0.132 0.146 9.227 9.628 multiply_cannon_loop 1966 14.4 0.089 0.105 8.689 9.046 mp_waitall_1 136719 16.5 7.079 7.877 7.079 7.877 multiply_cannon_metrocomm3 15728 15.4 0.037 0.044 5.345 5.931 ot_mini 96 10.5 0.001 0.001 5.563 5.755 rs_pw_transfer 878 11.9 0.011 0.014 4.545 5.201 init_scf_loop 11 6.9 0.000 0.000 4.756 4.768 density_rs2pw 107 9.7 0.005 0.006 3.829 4.464 init_scf_run 11 5.9 0.000 0.004 4.060 4.060 scf_env_initial_rho_setup 11 6.9 0.000 0.004 4.059 4.060 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.889 3.900 pw_transfer 1295 11.6 0.071 0.080 3.665 3.772 wfi_extrapolate 11 7.9 0.001 0.001 3.700 3.700 fft_wrap_pw1pw2 1081 12.6 0.008 0.009 3.531 3.625 potential_pw2rs 107 12.3 0.006 0.008 3.344 3.380 multiply_cannon_multrec 15728 15.4 2.951 3.343 2.962 3.355 fft_wrap_pw1pw2_140 439 13.2 0.557 0.713 2.956 3.168 qs_ot_get_derivative 96 11.5 0.001 0.001 2.798 2.980 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.726 2.833 apply_single 107 13.6 0.000 0.001 2.726 2.833 ot_diis_step 96 11.5 0.003 0.003 2.741 2.741 mp_waitany 8968 13.7 1.860 2.714 1.860 2.714 fft3d_ps 1081 14.6 1.137 1.323 2.290 2.497 make_m2s 3932 13.4 0.041 0.043 2.271 2.361 make_images 3932 14.4 0.105 0.109 1.976 2.061 mp_alltoall_d11v 1998 13.7 1.373 2.058 1.373 2.058 rs_pw_transfer_RS2PW_140 118 11.5 0.180 0.221 1.339 1.998 rs_gather_matrices 107 12.3 0.088 0.101 1.082 1.776 qs_ot_get_p 107 10.4 0.001 0.001 1.424 1.657 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=43.715999999999994, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.534, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.6, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.972, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.711, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=4.232, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.169, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.620000000000005, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.988, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.928, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.951, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.079, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.86, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.205 0.205 107.253 107.253 qs_energies 1 2.0 0.000 0.000 106.380 106.380 scf_env_do_scf 1 3.0 0.000 0.000 105.192 105.192 qs_ks_update_qs_env 8 5.0 0.000 0.000 100.031 100.031 rebuild_ks_matrix 7 6.0 0.000 0.000 99.975 99.975 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 99.975 99.975 hfx_ks_matrix 7 8.0 0.000 0.000 90.717 90.717 integrate_four_center 7 9.0 1.669 1.669 90.664 90.664 integrate_four_center_main 7 10.0 0.647 0.647 82.526 82.526 integrate_four_center_bin 449 11.0 81.879 81.879 81.879 81.879 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 57.184 57.184 init_scf_loop 1 4.0 0.000 0.000 47.998 47.998 integrate_four_center_load 7 10.0 0.000 0.000 6.219 6.219 hfx_load_balance 1 11.0 0.001 0.001 6.219 6.219 qs_vxc_create 14 8.0 0.000 0.000 3.220 3.220 xc_vxc_pw_create 14 9.0 0.125 0.125 3.220 3.220 hfx_load_balance_bin 1 12.0 3.102 3.102 3.102 3.102 hfx_load_balance_count 1 12.0 3.100 3.100 3.100 3.100 prepare_preconditioner 1 5.0 0.000 0.000 2.520 2.520 make_preconditioner 1 6.0 0.000 0.000 2.520 2.520 calculate_rho_elec 15 7.4 0.118 0.118 2.465 2.465 xc_rho_set_and_dset_create 14 10.0 0.099 0.099 2.430 2.430 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 2.381 2.381 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.225 0.253 97.733 97.744 qs_energies 1 2.0 0.000 0.000 97.215 97.223 scf_env_do_scf 1 3.0 0.000 0.000 96.849 96.849 qs_ks_update_qs_env 8 5.0 0.000 0.000 94.559 94.559 rebuild_ks_matrix 7 6.0 0.000 0.000 94.551 94.551 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 94.551 94.551 hfx_ks_matrix 7 8.0 0.000 0.000 88.775 88.785 integrate_four_center 7 9.0 0.054 0.335 88.766 88.767 integrate_four_center_main 7 10.0 0.003 0.004 80.559 81.471 integrate_four_center_bin 448 11.0 80.556 81.468 80.556 81.468 scf_env_do_scf_inner_loop 7 4.0 0.000 0.002 53.600 53.600 init_scf_loop 1 4.0 0.000 0.000 43.248 43.248 integrate_four_center_load 7 10.0 0.000 0.000 5.667 5.668 hfx_load_balance 1 11.0 0.001 0.001 5.667 5.667 mp_sync 56 11.2 1.872 2.920 1.872 2.920 hfx_load_balance_count 1 12.0 2.794 2.838 2.794 2.838 hfx_load_balance_bin 1 12.0 2.781 2.832 2.781 2.832 qs_vxc_create 14 8.0 0.000 0.000 2.482 2.482 xc_vxc_pw_create 14 9.0 0.007 0.008 2.482 2.482 xc_rho_set_and_dset_create 14 10.0 0.009 0.011 1.949 2.062 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=16.650999999999996, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=81.879, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.102, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.1, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.669, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.647, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.205, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.448000000000008, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=80.556, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.781, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.794, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.054, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.225, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=1.872, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 77.329 77.329 qs_energies 1 2.0 0.000 0.000 76.906 76.906 mp2_main 1 3.0 0.000 0.000 74.024 74.024 mp2_gpw_main 1 4.0 0.000 0.000 73.901 73.901 rpa_ri_compute_en 1 5.0 0.000 0.000 70.185 70.185 rpa_num_int 1 6.0 0.001 0.001 70.179 70.179 compute_mat_P_omega 1 7.0 0.003 0.003 60.204 60.204 compute_mat_P_omega_contract 10 8.0 9.103 9.103 59.995 59.995 dbt_total 2336 9.6 0.015 0.015 45.806 45.806 dbt_contract 787 11.0 0.043 0.043 38.840 38.840 dbt_tas_total 1149 12.2 0.234 0.234 37.677 37.677 dbt_tas_multiply 807 12.1 0.002 0.002 36.420 36.420 dbt_tas_dbm 807 14.1 0.004 0.004 30.186 30.186 dbm_multiply 807 16.1 30.177 30.177 30.177 30.177 dbt_tas_mm_1N 524 15.1 0.001 0.001 22.824 22.824 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 21.501 21.501 compute_mat_P_omega_calc_M_occ 250 9.0 9.106 9.106 17.031 17.031 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 6.725 6.725 dbt_tas_mm_2 251 15.0 0.002 0.002 5.834 5.834 dbt_copy 1103 10.7 0.064 0.064 5.690 5.690 compute_QP_energies 1 7.0 0.000 0.000 5.005 5.005 compute_self_energy_cubic_gw 1 8.0 0.053 0.053 5.004 5.004 contract_cubic_gw 21 9.0 0.000 0.000 3.979 3.979 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.708 3.708 dbt_tas_reserve_blocks_index 3261 14.3 0.485 0.485 3.682 3.682 dbm_reserve_blocks 3628 15.3 3.262 3.262 3.262 3.262 dbt_reserve_blocks_index 2280 13.1 0.056 0.056 2.815 2.815 dbt_reserve_blocks_index_array 2222 12.2 0.009 0.009 2.769 2.769 scf_env_do_scf 1 3.0 0.000 0.000 2.763 2.763 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.762 2.762 dbt_crop 1042 12.0 1.425 1.425 2.292 2.292 convert_to_new_pgrid 2421 14.1 0.058 0.058 2.243 2.243 dbt_tas_copy 574 11.4 1.303 1.303 2.210 2.210 dbm_copy 1614 15.1 2.184 2.184 2.184 2.184 compute_W_cubic_GW 10 7.0 0.017 0.017 2.063 2.063 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.061 2.061 dbt_reshape 278 11.9 1.061 1.061 1.957 1.957 dbt_tas_reshape 367 15.0 0.007 0.007 1.903 1.903 get_2c_integrals 1 6.0 0.000 0.000 1.763 1.763 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.586 1.586 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.027 35.362 35.373 qs_energies 1 2.0 0.000 0.000 35.233 35.235 mp2_main 1 3.0 0.000 0.000 34.092 34.093 mp2_gpw_main 1 4.0 0.000 0.001 34.058 34.060 rpa_ri_compute_en 1 5.0 0.000 0.000 32.801 32.802 rpa_num_int 1 6.0 0.000 0.002 32.800 32.802 dbt_total 2336 9.6 0.016 0.019 28.882 28.899 compute_mat_P_omega 1 7.0 0.001 0.005 27.958 27.971 compute_mat_P_omega_contract 10 8.0 0.392 0.417 27.771 27.784 dbt_contract 787 11.0 0.040 0.042 21.505 21.516 dbt_tas_total 1149 12.2 0.078 0.088 19.144 19.158 dbt_tas_multiply 807 12.1 0.002 0.002 19.040 19.044 dbt_tas_dbm 807 14.1 0.003 0.004 13.433 13.443 dbm_multiply 807 16.1 10.550 11.399 10.550 11.399 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.476 8.477 compute_mat_P_omega_calc_M_occ 250 9.0 0.375 0.396 7.992 7.994 mp_sync 8688 11.6 5.802 7.157 5.802 7.157 dbt_copy 1149 10.8 0.013 0.014 6.180 6.565 dbt_tas_mm_2 251 15.0 0.001 0.001 6.295 6.300 dbt_reshape 1136 11.8 2.622 2.869 5.843 6.213 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.549 5.559 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.710 5.450 compute_QP_energies 1 7.0 0.000 0.000 3.047 3.048 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.046 3.047 mp_waitall_2 3812 15.3 2.403 2.657 2.403 2.657 dbt_communicate_buffer 1136 12.8 0.051 0.056 2.379 2.592 contract_cubic_gw 21 9.0 0.000 0.000 2.385 2.385 dbt_reserve_blocks_index 2887 13.1 0.070 0.076 1.704 1.986 dbt_reserve_blocks_index_array 2829 12.2 0.008 0.009 1.692 1.975 dbt_tas_reserve_blocks_index 3347 14.5 0.439 0.493 1.682 1.962 dbt_crop 1042 12.0 0.869 0.974 1.403 1.627 dbm_reserve_blocks 3752 15.4 1.331 1.594 1.331 1.594 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 1.255 1.256 dbt_tas_replicate 405 14.1 0.539 0.709 1.115 1.182 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.110 1.114 parallel_gemm_fm 105 8.4 0.000 0.000 1.089 1.097 parallel_gemm_fm_cosma 105 9.4 1.089 1.097 1.089 1.097 scf_env_do_scf 1 3.0 0.000 0.000 1.096 1.096 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 1.096 1.096 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.051 1.061 mp_sum_l 6165 12.9 0.918 1.041 0.918 1.041 mp_max_i 2002 9.8 0.711 0.958 0.711 0.958 convert_to_new_pgrid 2421 14.1 0.025 0.030 0.754 0.951 dbm_copy 1608 15.1 0.724 0.921 0.724 0.921 compute_W_cubic_GW 10 7.0 0.001 0.001 0.828 0.838 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.435999999999993, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=30.177, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=9.106, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=9.103, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.262, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.184, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.061, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.163000000000004, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.55, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.375, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.392, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.331, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.724, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.622, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.802, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.403, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 224.110 224.110 qs_forces 1 2.0 0.000 0.000 223.524 223.524 rebuild_ks_matrix 7 6.6 0.000 0.000 222.113 222.113 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 222.113 222.113 hfx_ks_matrix 7 8.6 0.000 0.000 220.277 220.277 hfx_ri_update_ks 7 9.6 0.000 0.000 180.133 180.133 hfx_ri_update_ks_Pmat 7 10.6 31.698 31.698 180.129 180.129 dbt_total 809 11.0 0.006 0.006 165.418 165.418 qs_energies 1 3.0 0.000 0.000 159.761 159.761 scf_env_do_scf 1 4.0 0.000 0.000 159.455 159.455 qs_ks_update_qs_env 8 6.0 0.000 0.000 158.406 158.406 dbt_contract 207 12.4 0.088 0.088 146.595 146.595 dbt_tas_total 343 13.7 1.667 1.667 144.521 144.521 dbt_tas_multiply 216 13.5 0.001 0.001 141.366 141.366 dbt_tas_dbm 216 15.5 0.001 0.001 129.420 129.420 dbm_multiply 216 17.5 129.417 129.417 129.417 129.417 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 124.138 124.138 dbt_tas_mm_2 91 16.5 0.001 0.001 115.599 115.599 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 99.238 99.238 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 63.710 63.710 init_scf_loop 2 5.0 0.000 0.000 60.215 60.215 hfx_ri_update_forces 1 7.0 1.657 1.657 40.142 40.142 hfx_ri_forces_Pmat_3c 1 8.0 4.783 4.783 22.665 22.665 dbt_copy 409 11.7 0.040 0.040 15.660 15.660 precalc_derivatives 1 8.0 2.271 2.271 13.444 13.444 dbt_reshape 132 13.2 6.860 6.860 11.012 11.012 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.548 9.548 dbt_tas_mm_3T 77 17.1 0.000 0.000 9.392 9.392 dbt_tas_reserve_blocks_index 1255 15.4 1.017 1.017 7.666 7.666 build_3c_derivatives 3 9.0 2.458 2.458 7.490 7.490 dbm_reserve_blocks 1397 16.3 6.836 6.836 6.836 6.836 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 6.709 6.709 dbt_reserve_blocks_index 818 14.4 0.096 0.096 5.978 5.978 dbt_reserve_blocks_index_array 795 13.4 0.007 0.007 5.863 5.863 dbt_crop 372 13.7 2.886 2.886 4.833 4.833 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 4.754 4.754 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.023 51.731 51.741 qs_forces 1 2.0 0.000 0.000 51.309 51.309 rebuild_ks_matrix 7 6.6 0.000 0.000 50.290 50.291 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 50.290 50.291 hfx_ks_matrix 7 8.6 0.000 0.000 49.256 49.264 dbt_total 809 11.0 0.006 0.006 44.121 44.150 dbt_contract 207 12.4 0.025 0.026 34.160 34.182 dbt_tas_total 343 13.7 0.105 0.305 30.092 30.105 dbt_tas_multiply 216 13.5 0.001 0.001 29.675 29.678 hfx_ri_update_ks 7 9.6 0.000 0.000 29.508 29.508 hfx_ri_update_ks_Pmat 7 10.6 1.214 1.276 29.504 29.506 qs_energies 1 3.0 0.000 0.000 27.830 27.831 scf_env_do_scf 1 4.0 0.000 0.001 27.678 27.679 qs_ks_update_qs_env 8 6.0 0.000 0.000 26.822 26.823 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 23.469 23.469 dbt_tas_dbm 216 15.5 0.001 0.001 22.254 22.267 dbm_multiply 216 17.5 19.892 20.754 19.892 20.754 hfx_ri_update_forces 1 7.0 0.056 0.060 19.747 19.755 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 15.776 15.776 hfx_ri_forces_Pmat_3c 1 8.0 0.156 0.168 14.754 14.754 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 13.785 13.794 init_scf_loop 2 5.0 0.000 0.000 11.902 11.902 dbt_tas_mm_2 91 16.5 0.001 0.001 10.509 10.522 dbt_copy 497 12.3 0.011 0.013 8.583 8.969 mp_sync 2769 12.9 5.558 7.063 5.558 7.063 dbt_reshape 365 13.6 3.551 3.633 6.697 6.966 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.448 6.021 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 5.378 5.379 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.356 4.553 precalc_derivatives 1 8.0 0.079 0.084 3.692 3.692 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.633 3.633 dbt_tas_reserve_blocks_index 1380 15.8 0.897 0.929 3.057 3.424 mp_waitall_2 1234 16.4 2.912 3.109 2.912 3.109 dbt_reserve_blocks_index 1051 14.7 0.111 0.118 2.538 2.829 dbt_reserve_blocks_index_array 1028 13.8 0.005 0.006 2.511 2.799 dbm_reserve_blocks 1529 16.7 2.350 2.711 2.350 2.711 dbt_crop 372 13.7 1.750 1.810 2.520 2.618 dbt_communicate_buffer 365 14.6 0.012 0.013 2.071 2.229 mp_sum_l 6385 13.7 1.952 2.180 1.952 2.180 build_3c_derivatives 3 9.0 0.226 0.246 2.054 2.060 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.961 1.964 dbt_tas_replicate 149 15.4 0.634 0.690 1.646 1.757 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.750 1.750 convert_to_new_pgrid 648 15.5 0.036 0.080 1.362 1.674 dbm_copy 452 16.3 1.199 1.502 1.199 1.502 dbt_tas_copy 132 12.5 0.601 0.647 1.177 1.307 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=44.51599999999999, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=129.417, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=31.698, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=6.86, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.836, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.783, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=16.098000000000006, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=19.892, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.214, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.551, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.35, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.156, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.912, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=5.558, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 171.417 171.417 qs_energies 1 2.0 0.000 0.000 171.224 171.224 mp2_main 1 3.0 0.000 0.000 166.363 166.363 mp2_gpw_main 1 4.0 0.001 0.001 165.897 165.897 mp2_ri_gpw_compute_in 1 5.0 0.394 0.394 119.607 119.607 mp2_ri_gpw_compute_in_loop 1 6.0 0.011 0.011 110.625 110.625 mp2_eri_3c_integrate_gpw 2656 7.0 0.015 0.015 79.294 79.294 integrate_v_rspace 2666 8.0 0.642 0.642 65.990 65.990 grid_integrate_task_list 2666 9.0 63.312 63.312 63.312 63.312 mp2_ri_gpw_compute_en 1 5.0 0.090 0.090 46.264 46.264 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.583 9.583 44.379 44.379 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.134 2.134 27.434 27.434 local_gemm 2080 8.0 25.300 25.300 25.300 25.300 dbcsr_multiply_generic 5322 8.0 0.185 0.185 21.656 21.656 ao_to_mo_and_store_B_mult_1 2656 7.0 0.011 0.011 21.636 21.636 pw_transfer 63872 10.6 1.063 1.063 12.174 12.174 calculate_wavefunction 2656 8.0 8.059 8.059 11.826 11.826 multiply_cannon 5322 9.0 0.464 0.464 11.164 11.164 fft_wrap_pw1pw2 53228 11.4 0.113 0.113 10.864 10.864 multiply_cannon_loop 5322 10.0 0.129 0.129 9.756 9.756 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.757 2.757 9.577 9.577 get_2c_integrals 1 6.0 0.000 0.000 8.586 8.586 make_m2s 10644 9.0 0.063 0.063 8.316 8.316 multiply_cannon_multrec 5322 11.0 8.227 8.227 8.270 8.270 make_images 10644 10.0 3.218 3.218 8.008 8.008 fft_wrap_pw1pw2_20 21271 12.4 0.502 0.502 7.765 7.765 compute_2c_integrals 1 7.0 0.006 0.006 7.752 7.752 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 7.738 7.738 mp2_eri_2c_integrate_gpw 1 9.0 0.855 0.855 7.727 7.727 copy_dbcsr_to_fm 2679 8.0 0.029 0.029 7.440 7.440 fft3d_s 53229 13.4 6.640 6.640 6.680 6.680 dbcsr_complete_redistribute 2689 9.0 1.180 1.180 5.739 5.739 mp2_ri_gpw_compute_en_ener 2080 7.0 5.531 5.531 5.531 5.531 dbcsr_finalize 10708 9.5 0.167 0.167 5.081 5.081 scf_env_do_scf 1 3.0 0.000 0.000 4.456 4.456 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.456 4.456 dbcsr_merge_all 8011 10.3 3.375 3.375 4.423 4.423 potential_pw2rs 5322 10.0 0.153 0.153 3.997 3.997 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.028 40.065 40.075 qs_energies 1 2.0 0.000 0.000 39.763 39.764 mp2_main 1 3.0 0.000 0.001 37.638 37.639 mp2_gpw_main 1 4.0 0.001 0.002 37.541 37.542 mp2_ri_gpw_compute_en 1 5.0 0.290 0.299 19.980 20.190 mp2_ri_gpw_compute_en_RI_loop 1 6.0 3.409 3.966 18.631 18.638 mp2_ri_gpw_compute_in 1 5.0 0.053 0.053 17.472 17.845 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 16.142 16.519 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 14.066 14.243 integrate_v_rspace 93 8.1 0.103 0.113 13.974 14.152 grid_integrate_task_list 93 9.1 13.631 13.799 13.631 13.799 mp2_ri_gpw_compute_en_expansio 65 7.0 0.144 0.182 11.120 11.381 local_gemm 65 8.0 10.976 11.201 10.976 11.201 mp2_ri_gpw_compute_en_comm 30 7.0 0.099 0.156 3.708 4.663 mp_sendrecv_dm3 1860 8.0 3.002 4.137 3.002 4.137 scf_env_do_scf 1 3.0 0.000 0.000 1.991 1.992 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 1.991 1.992 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.758 1.934 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.742 1.917 get_2c_integrals 1 6.0 0.004 0.013 1.244 1.274 multiply_cannon 176 9.0 0.017 0.018 1.058 1.141 multiply_cannon_loop 176 10.0 0.002 0.002 1.001 1.080 qs_scf_new_mos 10 5.0 0.000 0.000 0.944 0.957 compute_2c_integrals 1 7.0 0.002 0.004 0.918 0.937 multiply_cannon_multrec 246 11.0 0.872 0.922 0.877 0.927 fill_local_i_aL 1920 8.0 0.653 0.914 0.653 0.914 eigensolver 11 5.8 0.001 0.001 0.891 0.893 compute_2c_integrals_loop_lm 1 8.0 0.002 0.004 0.762 0.845 mp2_eri_2c_integrate_gpw 1 9.0 0.203 0.248 0.761 0.843 pw_transfer 2120 10.5 0.044 0.049 0.779 0.816 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=56.93600000000001, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=63.312, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=25.3, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.583, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.227, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.059, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=8.175, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.631, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=10.976, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=3.409, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.872, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=3.002, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.091 0.091 126.977 126.977 qs_energies 1 2.0 0.000 0.000 125.472 125.472 scf_env_do_scf 1 3.0 0.000 0.000 118.344 118.344 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 118.344 118.344 qs_ks_update_qs_env 15 5.0 0.000 0.000 50.395 50.395 rebuild_ks_matrix 15 6.0 0.000 0.000 50.176 50.176 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 50.176 50.176 qs_scf_new_mos 15 5.0 0.000 0.000 44.051 44.051 eigensolver 15 6.0 0.002 0.002 36.139 36.139 qs_vxc_create 15 8.0 0.038 0.038 34.359 34.359 calculate_dispersion_nonloc 15 9.0 6.895 6.895 29.997 29.997 pw_transfer 1191 10.0 0.069 0.069 23.604 23.604 fft_wrap_pw1pw2 1086 11.0 0.009 0.009 23.403 23.403 cp_fm_diag_elpa 15 7.0 0.000 0.000 23.055 23.055 cp_fm_diag_elpa_base 15 8.0 20.469 20.469 23.055 23.055 qs_rho_update_rho_low 16 5.0 0.000 0.000 20.503 20.503 calculate_rho_elec 16 6.0 0.218 0.218 20.503 20.503 grid_collocate_task_list 16 7.0 19.089 19.089 19.089 19.089 fft_wrap_pw1pw2_150 765 12.0 3.656 3.656 17.191 17.191 sum_up_and_integrate 15 8.0 0.157 0.157 14.649 14.649 integrate_v_rspace 15 9.0 0.019 0.019 14.492 14.492 grid_integrate_task_list 15 10.0 13.960 13.960 13.960 13.960 cp_fm_cholesky_restore 45 7.0 10.706 10.706 10.706 10.706 fft3d_s 1087 13.0 10.447 10.447 10.454 10.454 pw_scatter_s 585 13.1 7.299 7.299 7.299 7.299 fft_wrap_pw1pw2_200 197 12.3 0.756 0.756 6.020 6.020 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.730 5.730 dbcsr_complete_redistribute 46 8.3 2.278 2.278 5.616 5.616 cp_fm_upper_to_full 30 8.0 4.962 4.962 4.962 4.962 vdW_energy 15 10.0 4.506 4.506 4.506 4.506 gspace_mixing 14 5.0 0.171 0.171 4.443 4.443 xc_vxc_pw_create 15 9.0 0.230 0.230 4.324 4.324 broyden_mixing 14 6.0 3.824 3.824 3.824 3.824 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.638 3.638 init_scf_run 1 3.0 0.000 0.000 3.026 3.026 xc_pw_derive 90 11.0 0.001 0.001 2.800 2.800 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.029 61.762 61.773 qs_energies 1 2.0 0.000 0.000 61.446 61.452 scf_env_do_scf 1 3.0 0.000 0.001 57.236 57.236 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 57.236 57.236 qs_ks_update_qs_env 15 5.0 0.000 0.000 25.267 25.279 rebuild_ks_matrix 15 6.0 0.000 0.000 25.232 25.244 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 25.232 25.244 qs_rho_update_rho_low 16 5.0 0.000 0.000 19.021 19.027 calculate_rho_elec 16 6.0 0.007 0.010 19.021 19.027 grid_collocate_task_list 16 7.0 17.764 17.970 17.764 17.970 sum_up_and_integrate 15 8.0 0.015 0.023 14.499 14.550 integrate_v_rspace 15 9.0 0.001 0.001 14.484 14.538 grid_integrate_task_list 15 10.0 13.468 13.649 13.468 13.649 qs_scf_new_mos 15 5.0 0.000 0.000 13.435 13.487 eigensolver 15 6.0 0.001 0.002 12.422 12.451 qs_vxc_create 15 8.0 0.001 0.001 10.384 10.402 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.609 8.615 cp_fm_diag_elpa_base 15 8.0 8.467 8.490 8.605 8.607 pw_transfer 1191 10.0 0.080 0.091 8.259 8.326 calculate_dispersion_nonloc 15 9.0 0.912 0.926 8.262 8.291 fft_wrap_pw1pw2 1086 11.0 0.011 0.013 8.096 8.192 fft3d_ps 1086 13.0 2.645 3.034 5.540 6.117 fft_wrap_pw1pw2_150 765 12.0 0.619 0.802 5.201 5.230 cp_fm_cholesky_restore 45 7.0 3.665 3.728 3.665 3.728 mp_alltoall_z22v 1086 15.0 2.354 3.461 2.354 3.461 fft_wrap_pw1pw2_200 197 12.3 0.454 0.572 2.769 2.815 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.692 2.692 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.379 2.588 xc_vxc_pw_create 15 9.0 0.016 0.022 2.121 2.166 x_to_yz 585 14.1 0.336 0.368 1.526 1.989 yz_to_x 501 13.9 0.181 0.201 1.345 1.935 build_core_ppnl 1 5.0 1.477 1.605 1.477 1.605 xc_pw_derive 90 11.0 0.001 0.001 1.466 1.557 vdW_energy 15 10.0 1.299 1.380 1.299 1.380 rs_pw_transfer 158 9.4 0.002 0.002 0.997 1.329 init_scf_run 1 3.0 0.000 0.000 1.295 1.296 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=52.306, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=20.469, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.089, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.96, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.706, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.447, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.753, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.467, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.764, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.468, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.665, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.645, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.088 0.088 278.194 278.194 qs_energies 1 2.0 0.000 0.000 278.042 278.042 ls_scf 1 3.0 0.000 0.000 276.746 276.746 ls_scf_main 1 4.0 0.002 0.002 267.334 267.334 density_matrix_trs4 11 5.0 0.013 0.013 164.649 164.649 ls_scf_dm_to_ks 11 5.0 0.000 0.000 96.307 96.307 matrix_ls_to_qs 11 6.0 0.000 0.000 92.605 92.605 dbcsr_multiply_generic 185 6.1 0.971 0.971 81.398 81.398 arnoldi_extremal 12 6.1 0.000 0.000 67.145 67.145 arnoldi_normal_ev 12 7.1 0.047 0.047 67.144 67.144 build_subspace 23 8.1 0.088 0.088 65.859 65.859 dbcsr_matrix_vector_mult 652 9.0 0.263 0.263 65.196 65.196 dbcsr_matrix_vector_mult_local 652 10.0 63.620 63.620 63.630 63.630 dbcsr_complete_redistribute 23 7.5 34.486 34.486 48.545 48.545 dbcsr_copy_into_existing 11 7.0 48.482 48.482 48.482 48.482 multiply_cannon 185 7.1 0.376 0.376 46.683 46.683 matrix_decluster 11 7.0 0.000 0.000 44.122 44.122 multiply_cannon_loop 185 8.1 0.258 0.258 33.781 33.781 make_m2s 370 7.1 0.040 0.040 29.382 29.382 make_images 370 8.1 12.899 12.899 26.919 26.919 multiply_cannon_multrec 185 9.1 24.085 24.085 24.245 24.245 dbcsr_finalize 646 7.5 0.229 0.229 18.320 18.320 dbcsr_merge_all 597 8.5 2.725 2.725 17.026 17.026 tree_to_linear_d 110 9.4 12.767 12.767 12.767 12.767 setup_rec_index_2d 370 8.1 12.357 12.357 12.357 12.357 dbcsr_sort_indices 1103 9.9 11.877 11.877 11.877 11.877 quick_finalize 395 10.0 0.454 0.454 10.263 10.263 dbcsr_special_finalize 370 9.1 0.002 0.002 9.472 9.472 calculate_norms 370 9.1 9.277 9.277 9.277 9.277 ls_scf_init_scf 1 4.0 0.000 0.000 8.693 8.693 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.352 8.352 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.683 7.683 dbcsr_dot_sd 144 6.3 7.351 7.351 7.352 7.352 matrix_qs_to_ls 12 5.1 0.000 0.000 6.545 6.545 matrix_cluster 12 6.1 0.000 0.000 6.545 6.545 dbcsr_frobenius_norm 142 6.1 5.717 5.717 5.719 5.719 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.021 74.862 74.873 qs_energies 1 2.0 0.000 0.000 74.634 74.634 ls_scf 1 3.0 0.000 0.000 74.570 74.571 ls_scf_main 1 4.0 0.001 0.009 71.715 71.715 density_matrix_trs4 11 5.0 0.007 0.022 69.028 69.123 dbcsr_multiply_generic 185 6.1 0.061 0.068 65.526 65.819 multiply_cannon 185 7.1 0.034 0.038 54.019 54.750 multiply_cannon_loop 185 8.1 0.116 0.136 51.460 52.134 multiply_cannon_multrec 1480 9.1 30.865 34.318 31.129 34.598 mp_waitall_1 11936 10.3 18.362 22.863 18.362 22.863 multiply_cannon_metrocomm3 1480 9.1 0.015 0.017 13.781 18.790 make_m2s 370 7.1 0.035 0.037 7.594 7.667 make_images 370 8.1 0.635 0.669 7.462 7.529 calculate_norms 2960 9.1 5.073 5.685 5.073 5.685 mp_sum_l 1199 5.3 3.053 3.960 3.053 3.960 make_images_data 370 9.1 0.009 0.012 3.560 3.855 hybrid_alltoall_any 393 9.9 0.252 1.842 3.048 3.334 multiply_cannon_metrocomm1 1480 9.1 0.006 0.008 1.323 3.138 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 2.092 2.885 arnoldi_extremal 12 6.1 0.000 0.000 2.650 2.668 arnoldi_normal_ev 12 7.1 0.001 0.008 2.650 2.668 build_subspace 23 8.1 0.020 0.026 2.535 2.537 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.331 2.437 dbcsr_matrix_vector_mult 652 9.0 0.014 0.050 2.112 2.199 dbcsr_complete_redistribute 23 7.5 1.232 1.313 2.080 2.196 ls_scf_init_scf 1 4.0 0.000 0.000 2.183 2.184 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.152 2.162 matrix_ls_to_qs 11 6.0 0.000 0.000 2.019 2.135 matrix_decluster 11 7.0 0.000 0.004 1.874 1.991 make_images_pack 370 9.1 1.751 1.973 1.754 1.976 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.960 1.962 dbcsr_matrix_vector_mult_local 652 10.0 1.680 1.742 1.682 1.744 buffer_matrices_ensure_size 370 8.1 1.448 1.728 1.448 1.728 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=85.34500000000003, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=63.62, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=48.482, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=34.486, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=24.085, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images", label="make_images", y=12.899, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.277, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=12.211000000000006, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.68, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.232, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=30.865, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images", label="make_images", y=0.635, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.073, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.053, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=18.362, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.751, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 74.726 74.726 lib_test 1 2.0 0.000 0.000 74.718 74.718 dbcsr_run_tests 3 3.0 0.002 0.002 74.718 74.718 test_multiplies_multiproc 3 4.0 0.001 0.001 58.430 58.430 dbcsr_redistribute 9 5.0 38.474 38.474 40.133 40.133 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.927 16.927 dbcsr_make_random_matrix 9 4.0 12.837 12.837 16.173 16.173 multiply_cannon 9 6.0 0.023 0.023 11.993 11.993 multiply_cannon_loop 9 7.0 0.032 0.032 11.615 11.615 multiply_cannon_multrec 9 8.0 11.582 11.582 11.583 11.583 dbcsr_finalize 27 5.7 0.021 0.021 5.999 5.999 dbcsr_merge_all 18 6.5 2.120 2.120 5.249 5.249 dbcsr_data_release 975 7.6 2.855 2.855 2.855 2.855 tree_to_linear_d 9 7.0 1.991 1.991 1.991 1.991 make_m2s 18 6.0 0.001 0.001 1.705 1.705 make_images 18 7.0 0.589 0.589 1.629 1.629 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.011 20.421 20.425 lib_test 1 2.0 0.000 0.000 20.285 20.309 dbcsr_run_tests 3 3.0 0.000 0.001 20.284 20.308 test_multiplies_multiproc 3 4.0 0.000 0.002 19.439 19.504 dbcsr_multiply_generic 9 5.0 0.001 0.001 17.738 17.845 multiply_cannon 9 6.0 0.002 0.002 15.249 15.685 multiply_cannon_loop 9 7.0 0.002 0.002 14.929 15.356 multiply_cannon_multrec 72 8.0 11.309 11.940 11.309 11.941 mp_waitall_1 576 9.2 3.980 4.775 3.980 4.775 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 3.491 4.388 mp_sum_l 470 2.5 0.976 1.369 0.976 1.369 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.847 1.240 dbcsr_data_release 444 7.6 0.779 0.883 0.779 0.883 dbcsr_make_random_matrix 9 4.0 0.661 0.672 0.807 0.843 make_m2s 18 6.0 0.001 0.001 0.776 0.831 make_images 18 7.0 0.021 0.023 0.772 0.828 dbcsr_destroy 111 5.9 0.002 0.037 0.677 0.768 dbcsr_finalize 27 5.7 0.000 0.001 0.605 0.709 dbcsr_redistribute 9 5.0 0.251 0.287 0.605 0.642 dbcsr_merge_all 18 6.5 0.095 0.118 0.500 0.578 make_images_data 18 8.0 0.001 0.004 0.406 0.472 hybrid_alltoall_any 18 9.0 0.038 0.185 0.336 0.425 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.858000000000004, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=38.474, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.837, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.582, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.855, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.12, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.370000000000001, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.251, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.661, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.309, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.779, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.095, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.98, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.976, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.078 0.078 138.792 138.792 qs_mol_dyn_low 1 2.0 0.003 0.003 137.243 137.243 velocity_verlet 5 3.0 0.004 0.004 113.060 113.060 qmmm_el_coupling 6 3.8 0.000 0.000 87.736 87.736 qmmm_elec_with_gaussian 6 4.8 0.029 0.029 87.732 87.732 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 86.798 86.798 qmmm_elec_gaussian_low_G 6 6.8 85.883 85.883 85.883 85.883 qs_forces 6 3.8 0.001 0.001 40.315 40.315 qs_energies 6 4.8 0.000 0.000 35.444 35.444 scf_env_do_scf 6 5.8 0.001 0.001 32.853 32.853 rebuild_ks_matrix 45 8.4 0.000 0.000 28.861 28.861 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 28.861 28.861 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 28.496 28.496 qs_ks_update_qs_env 45 7.8 0.000 0.000 24.497 24.497 pw_transfer 966 12.3 0.055 0.055 17.879 17.879 fft_wrap_pw1pw2 801 13.6 0.006 0.006 17.657 17.657 fft_wrap_pw1pw2_150 507 15.2 2.620 2.620 17.272 17.272 qs_vxc_create 45 10.4 0.001 0.001 14.629 14.629 xc_vxc_pw_create 45 11.4 0.748 0.748 14.628 14.628 xc_pw_derive 270 13.4 0.002 0.002 9.660 9.660 xc_rho_set_and_dset_create 45 12.4 1.109 1.109 7.970 7.970 fft3d_s 802 15.6 7.646 7.646 7.654 7.654 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.403 7.403 calculate_rho_elec 45 8.9 0.632 0.632 7.403 7.403 pw_integral_ab 2539 7.4 6.849 6.849 6.849 6.849 pw_scatter_s 429 15.8 6.036 6.036 6.036 6.036 qmmm_forces 6 3.8 0.003 0.003 5.952 5.952 xc_pw_divergence 45 12.4 0.001 0.001 5.849 5.849 qmmm_forces_with_gaussian 6 4.8 0.045 0.045 5.530 5.530 qs_ks_ddapc 45 10.4 0.001 0.001 5.131 5.131 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.379 4.379 init_scf_loop 6 6.8 0.000 0.000 4.352 4.352 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.304 4.304 sum_up_and_integrate 45 10.4 0.884 0.884 4.112 4.112 qmmm_forces_gaussian_low_G 6 6.8 3.578 3.578 3.578 3.578 density_rs2pw 45 9.9 0.002 0.002 3.464 3.464 cp_ddapc_apply_CD 45 11.4 0.006 0.006 3.417 3.417 grid_collocate_task_list 45 9.9 3.306 3.306 3.306 3.306 pw_poisson_solve 51 9.9 0.692 0.692 3.249 3.249 integrate_v_rspace 45 11.4 0.007 0.007 3.228 3.228 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.043 0.062 61.053 61.068 qs_mol_dyn_low 1 2.0 0.003 0.003 59.737 59.800 qs_forces 6 3.8 0.001 0.001 44.761 44.762 qs_energies 6 4.8 0.000 0.000 42.729 42.729 scf_env_do_scf 6 5.8 0.000 0.001 41.586 41.586 scf_env_do_scf_inner_loop 113 6.2 0.003 0.020 39.845 39.846 rebuild_ks_matrix 119 8.1 0.000 0.000 29.610 29.619 qs_ks_build_kohn_sham_matrix 119 9.1 0.016 0.018 29.610 29.619 qs_ks_update_qs_env 119 7.3 0.001 0.001 27.878 27.887 velocity_verlet 5 3.0 0.002 0.004 24.552 24.555 pw_transfer 2446 12.3 0.172 0.195 21.226 21.684 fft_wrap_pw1pw2 2059 13.4 0.021 0.025 20.841 21.338 fft_wrap_pw1pw2_150 1321 14.9 3.196 3.946 19.998 20.536 qs_vxc_create 119 10.1 0.002 0.003 16.041 16.047 xc_vxc_pw_create 119 11.1 0.148 0.223 16.039 16.044 fft3d_ps 2059 15.4 7.197 8.346 13.955 15.588 xc_pw_derive 714 13.1 0.008 0.010 12.379 12.623 qs_rho_update_rho_low 119 7.3 0.001 0.001 11.511 11.512 calculate_rho_elec 119 8.3 0.049 0.056 11.510 11.511 sum_up_and_integrate 119 10.1 0.074 0.094 9.848 9.876 integrate_v_rspace 119 11.1 0.003 0.004 9.773 9.817 xc_pw_divergence 119 12.1 0.004 0.005 8.066 8.322 xc_rho_set_and_dset_create 119 12.1 0.332 0.419 7.550 7.809 qmmm_forces 6 3.8 0.002 0.003 7.703 7.704 mp_alltoall_z22v 2059 17.4 5.436 7.662 5.436 7.662 qmmm_forces_with_gaussian 6 4.8 0.006 0.007 7.180 7.512 rs_pw_transfer 988 11.5 0.012 0.015 6.838 7.201 density_rs2pw 119 9.3 0.006 0.007 6.873 7.144 qmmm_el_coupling 6 3.8 0.000 0.000 6.222 6.467 qmmm_elec_with_gaussian 6 4.8 0.003 0.004 6.220 6.466 potential_pw2rs 119 12.1 0.007 0.007 5.915 5.947 grid_collocate_task_list 119 9.3 4.377 4.810 4.377 4.810 x_to_yz 1095 16.8 0.774 0.869 3.567 4.606 yz_to_x 964 16.0 0.495 0.619 3.139 4.439 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.021 4.056 mp_waitany 4028 12.8 3.018 3.826 3.018 3.826 grid_integrate_task_list 119 12.1 3.529 3.747 3.529 3.747 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.443 3.476 qmmm_forces_gaussian_low_G 6 6.8 3.308 3.342 3.308 3.342 qmmm_elec_gaussian_low_G 6 6.8 2.850 2.883 2.850 2.883 pw_restrict_s3 18 5.8 1.373 1.456 2.552 2.873 rs_pw_transfer_PW2RS_150 125 13.9 0.751 0.952 2.607 2.676 qs_scf_new_mos 113 7.2 0.000 0.000 2.516 2.521 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.515 2.521 pw_gather_p 964 15.0 2.073 2.521 2.073 2.521 rs_pw_transfer_RS2PW_150 125 11.2 0.620 0.803 2.179 2.512 ot_scf_mini 113 9.2 0.001 0.001 2.426 2.431 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.110 2.363 pw_prolongate_s3 18 6.8 1.149 1.242 2.110 2.363 qs_ks_ddapc 119 10.1 0.002 0.003 2.176 2.330 dbcsr_multiply_generic 2588 12.3 0.058 0.059 2.015 2.066 mp_sum_d 5822 12.2 1.140 1.891 1.140 1.891 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.744 1.745 init_scf_loop 6 6.8 0.000 0.000 1.737 1.738 pw_integral_ab 2761 7.7 1.104 1.324 1.545 1.687 pw_scatter_p 1095 15.8 1.550 1.608 1.550 1.608 ot_mini 113 10.2 0.000 0.001 1.280 1.286 mp_sum_dm3 33 5.7 1.167 1.237 1.167 1.237 mp_waitall_1 177795 16.4 1.003 1.235 1.003 1.235 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=25.494, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=85.883, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.646, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=6.849, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=6.036, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.578, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.306, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=33.251999999999995, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.85, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.104, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.308, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.377, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.197, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.529, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.436, yerr=0.0 Summary: Performance test took 33 minutes. Status: OK Removing intermediate container 7f49a6d4705d ---> c24cb6c642cd Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 5c7a84363958 Removing intermediate container 5c7a84363958 ---> c0c905b7a3f5 Step 42/42 : ENTRYPOINT [] ---> Running in e3c7ca380970 Removing intermediate container e3c7ca380970 ---> a9adbe69baf5 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built a9adbe69baf5 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2023-06-24 21:34:13+00:00