StartDate: 2022-11-28 19:06:00+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: a007b1b0cc940e469a6f710d029fae2a5232a07b CommitTime: 2022-11-28 16:17:07 +0100 CommitAuthor: Matthias Krack CommitSubject: Update test for aiida-cp2k (v1.5.0 -> v2.0.0b0) Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=a007b1b0cc940e469a6f710d029fae2a5232a07b Sending build context to Docker daemon 365.2MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu e96e057aae67: Already exists Digest: sha256:4b1d0c4a2d2aaf63b37111f34eb9fa89fa1bf53dd6e4ca954d47caebca4005c2 Status: Downloaded newer image for ubuntu:22.04 ---> a8780b506fa4 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 456d88508063 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 196acfd778fb Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 1c5721a94525 Step 5/42 : RUN mkdir scripts ---> Using cache ---> aa1c7bdf0e90 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 8641bd388b4f Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 13fd585790c3 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> cac01e5ed808 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 4d68dfb96b0f Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> d498842004b1 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 653a132d5aa7 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 932f838464d3 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 1801374ed881 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> a8995b19ba48 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> e525eee1ff8e Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> f4f361a003dd Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> d9846d8fb855 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> d624138fd825 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 0852cfeb086b Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 6c47a36ef72e Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 330701cd5327 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 11a46bf60703 Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> d0e7361681ed Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> e10558b1118d Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> b8012d08a940 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> a166563e6660 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 03495f2a4096 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 39d9f60a4a0d Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> b366bde029cd Step 30/42 : COPY ./Makefile . ---> Using cache ---> c76bf2add1a2 Step 31/42 : COPY ./src ./src ---> Using cache ---> 1a62315dd55b Step 32/42 : COPY ./exts ./exts ---> Using cache ---> c72a4a4d8868 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> a4724cc42833 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in f6ff0549d83a './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container f6ff0549d83a ---> c5c027771313 Step 35/42 : COPY ./data ./data ---> 5dfc3c2b3afd Step 36/42 : COPY ./tests ./tests ---> 67440516f32e Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> fbd69a5d9cf4 Step 38/42 : COPY ./benchmarks ./benchmarks ---> 4d11f10710e2 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> cc9eb77571ff Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 49dba035c53d ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.034 90.197 90.197 qs_mol_dyn_low 1 2.0 0.003 0.003 89.569 89.569 qs_forces 11 3.9 0.001 0.001 89.529 89.529 qs_energies 11 4.9 0.001 0.001 83.558 83.558 scf_env_do_scf 11 5.9 0.001 0.001 72.701 72.701 velocity_verlet 10 3.0 0.002 0.002 58.211 58.211 scf_env_do_scf_inner_loop 108 6.5 0.012 0.012 55.926 55.926 rebuild_ks_matrix 119 8.3 0.001 0.001 21.001 21.001 qs_ks_build_kohn_sham_matrix 119 9.3 0.012 0.012 21.001 21.001 dbcsr_multiply_generic 2286 12.5 0.156 0.156 20.411 20.411 qs_rho_update_rho_low 119 7.7 0.000 0.000 20.325 20.325 calculate_rho_elec 119 8.7 0.963 0.963 20.324 20.324 qs_scf_new_mos 108 7.5 0.001 0.001 20.247 20.247 qs_scf_loop_do_ot 108 8.5 0.001 0.001 20.246 20.246 qs_ks_update_qs_env 119 7.6 0.001 0.001 19.304 19.304 ot_scf_mini 108 9.5 0.002 0.002 18.802 18.802 init_scf_loop 11 6.9 0.000 0.000 16.594 16.594 grid_collocate_task_list 119 9.7 16.128 16.128 16.128 16.128 prepare_preconditioner 11 7.9 0.000 0.000 14.052 14.052 make_preconditioner 11 8.9 0.000 0.000 14.052 14.052 sum_up_and_integrate 119 10.3 0.534 0.534 13.492 13.492 integrate_v_rspace 119 11.3 0.092 0.092 12.958 12.958 make_full_inverse_cholesky 11 9.9 0.000 0.000 12.914 12.914 ot_mini 108 10.5 0.001 0.001 12.154 12.154 make_m2s 4572 13.5 0.045 0.045 11.042 11.042 grid_integrate_task_list 119 12.3 11.001 11.001 11.001 11.001 qs_ot_get_derivative 108 11.5 0.001 0.001 6.307 6.307 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.214 6.214 dbcsr_make_dense_low 5837 15.5 0.065 0.065 5.931 5.931 make_dense_data 5837 16.5 5.162 5.162 5.852 5.852 ot_diis_step 108 11.5 0.004 0.004 5.843 5.843 pw_transfer 1439 11.6 0.066 0.066 5.788 5.788 multiply_cannon 2286 13.5 0.176 0.176 5.580 5.580 make_images 4572 14.5 2.176 2.176 5.559 5.559 fft_wrap_pw1pw2 1201 12.6 0.006 0.006 5.553 5.553 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.351 5.351 apply_single 119 13.6 0.000 0.000 5.351 5.351 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.180 5.180 multiply_cannon_loop 2286 14.5 0.054 0.054 5.112 5.112 multiply_cannon_multrec 2286 15.5 5.004 5.004 5.057 5.057 fft_wrap_pw1pw2_140 487 13.2 0.426 0.426 4.715 4.715 cp_fm_cholesky_decompose 22 10.9 4.428 4.428 4.428 4.428 cp_fm_cholesky_invert 11 10.9 4.293 4.293 4.293 4.293 init_scf_run 11 5.9 0.002 0.002 4.028 4.028 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.026 4.026 dbcsr_complete_redistribute 329 12.2 1.945 1.945 3.711 3.711 wfi_extrapolate 11 7.9 0.001 0.001 3.494 3.494 dbcsr_copy 2102 12.0 0.249 0.249 3.470 3.470 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.329 3.329 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.298 3.298 density_rs2pw 119 9.7 0.005 0.005 3.233 3.233 dbcsr_copy_into_existing 22 7.9 3.189 3.189 3.190 3.190 qs_ot_get_p 119 10.4 0.001 0.001 3.118 3.118 copy_dbcsr_to_fm 153 11.3 0.002 0.002 3.079 3.079 qs_create_task_list 11 7.9 0.000 0.000 2.919 2.919 generate_qs_task_list 11 8.9 1.969 1.969 2.919 2.919 fft3d_s 1202 14.6 2.859 2.859 2.864 2.864 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.640 2.640 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.505 2.505 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.384 2.384 dbcsr_data_release 279534 16.0 2.030 2.030 2.030 2.030 pw_poisson_solve 119 10.3 0.353 0.353 2.017 2.017 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.978 1.978 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.916 1.916 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.893 1.893 potential_pw2rs 119 12.3 0.047 0.047 1.865 1.865 qs_ot_p2m_diag 50 11.0 0.154 0.154 1.827 1.827 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.023 47.424 47.434 qs_mol_dyn_low 1 2.0 0.004 0.005 47.307 47.311 qs_forces 11 3.9 0.001 0.001 47.263 47.264 qs_energies 11 4.9 0.001 0.013 44.157 44.158 scf_env_do_scf 11 5.9 0.000 0.002 40.425 40.426 scf_env_do_scf_inner_loop 108 6.5 0.003 0.024 37.296 37.296 velocity_verlet 10 3.0 0.002 0.004 28.227 28.228 rebuild_ks_matrix 119 8.3 0.001 0.001 17.235 17.324 qs_ks_build_kohn_sham_matrix 119 9.3 0.016 0.019 17.234 17.324 qs_ks_update_qs_env 119 7.6 0.001 0.001 15.321 15.406 qs_rho_update_rho_low 119 7.7 0.001 0.001 14.047 14.060 calculate_rho_elec 119 8.7 0.030 0.032 14.047 14.060 dbcsr_multiply_generic 2286 12.5 0.078 0.085 13.726 13.889 sum_up_and_integrate 119 10.3 0.038 0.042 12.870 12.883 integrate_v_rspace 119 11.3 0.004 0.005 12.833 12.846 qs_scf_new_mos 108 7.5 0.001 0.001 10.833 10.936 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.832 10.936 multiply_cannon 2286 13.5 0.158 0.170 10.090 10.452 ot_scf_mini 108 9.5 0.002 0.003 10.161 10.255 grid_collocate_task_list 119 9.7 9.838 10.122 9.838 10.122 multiply_cannon_loop 2286 14.5 0.099 0.105 9.506 9.882 grid_integrate_task_list 119 12.3 8.810 9.097 8.810 9.097 mp_waitall_1 169478 16.3 8.271 8.752 8.271 8.752 ot_mini 108 10.5 0.001 0.001 5.886 5.996 multiply_cannon_metrocomm3 18288 15.5 0.038 0.040 5.367 5.946 rs_pw_transfer 974 11.9 0.012 0.014 4.573 4.971 density_rs2pw 119 9.7 0.006 0.007 3.823 4.209 pw_transfer 1439 11.6 0.106 0.114 3.664 3.729 fft_wrap_pw1pw2 1201 12.6 0.010 0.012 3.478 3.540 potential_pw2rs 119 12.3 0.008 0.008 3.467 3.482 multiply_cannon_multrec 18288 15.5 3.295 3.452 3.305 3.462 init_scf_loop 11 6.9 0.000 0.000 3.114 3.115 qs_ot_get_derivative 108 11.5 0.001 0.001 3.014 3.108 fft_wrap_pw1pw2_140 487 13.2 0.317 0.345 2.954 3.045 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 2.776 2.883 apply_single 119 13.6 0.000 0.000 2.776 2.882 ot_diis_step 108 11.5 0.004 0.004 2.851 2.851 init_scf_run 11 5.9 0.000 0.005 2.655 2.655 scf_env_initial_rho_setup 11 6.9 0.000 0.004 2.654 2.655 fft3d_ps 1201 14.6 1.338 1.455 2.510 2.573 make_m2s 4572 13.5 0.048 0.055 2.473 2.534 wfi_extrapolate 11 7.9 0.001 0.001 2.345 2.345 make_images 4572 14.5 0.123 0.128 2.126 2.192 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.061 2.072 mp_waitany 9880 13.7 1.387 1.824 1.387 1.824 qs_ot_get_p 119 10.4 0.001 0.001 1.325 1.481 rs_pw_transfer_RS2PW_140 130 11.5 0.226 0.252 1.073 1.475 rs_pw_transfer_PW2RS_140 130 13.9 0.426 0.467 1.326 1.356 make_images_data 4572 15.5 0.038 0.045 1.155 1.252 prepare_preconditioner 11 7.9 0.000 0.000 1.165 1.188 make_preconditioner 11 8.9 0.000 0.000 1.165 1.188 hybrid_alltoall_any 4725 16.4 0.070 0.218 1.033 1.128 multiply_cannon_metrocomm1 18288 15.5 0.019 0.020 0.528 1.121 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.047 1.070 mp_sum_l 11218 13.2 0.692 1.060 0.692 1.060 mp_alltoall_d11v 2130 13.8 0.775 1.040 0.775 1.040 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 0.935 1.019 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.959 1.011 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.948 0.995 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.974 0.976 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=48.47400000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=16.128, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=11.001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.162, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.004, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.428, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=15.823, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.838, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.81, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.295, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.271, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.387, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.028 0.028 112.664 112.664 qs_mol_dyn_low 1 2.0 0.003 0.003 111.995 111.995 qs_forces 11 3.9 0.001 0.001 111.956 111.956 qs_energies 11 4.9 0.001 0.001 104.157 104.157 scf_env_do_scf 11 5.9 0.001 0.001 91.596 91.596 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 73.470 73.470 velocity_verlet 10 3.0 0.002 0.002 71.910 71.910 rebuild_ks_matrix 107 8.3 0.001 0.001 33.958 33.958 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 33.957 33.957 qs_rho_update_rho_low 107 7.7 0.000 0.000 32.579 32.579 calculate_rho_elec 107 8.7 0.859 0.859 32.578 32.578 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.403 30.403 grid_collocate_task_list 107 9.7 28.434 28.434 28.434 28.434 sum_up_and_integrate 107 10.3 0.455 0.455 27.182 27.182 integrate_v_rspace 107 11.3 0.086 0.086 26.727 26.727 grid_integrate_task_list 107 12.3 24.923 24.923 24.923 24.923 dbcsr_multiply_generic 1966 12.4 0.133 0.133 17.938 17.938 init_scf_loop 11 6.9 0.000 0.000 17.888 17.888 qs_scf_new_mos 96 7.5 0.000 0.000 17.319 17.319 qs_scf_loop_do_ot 96 8.5 0.001 0.001 17.319 17.319 ot_scf_mini 96 9.5 0.002 0.002 16.098 16.098 prepare_preconditioner 11 7.9 0.000 0.000 13.823 13.823 make_preconditioner 11 8.9 0.000 0.000 13.823 13.823 make_full_inverse_cholesky 11 9.9 0.000 0.000 12.676 12.676 ot_mini 96 10.5 0.001 0.001 10.393 10.393 make_m2s 3932 13.4 0.039 0.039 9.707 9.707 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.505 6.505 pw_transfer 1295 11.6 0.057 0.057 5.643 5.643 qs_ot_get_derivative 96 11.5 0.001 0.001 5.482 5.482 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.462 5.462 init_scf_run 11 5.9 0.002 0.002 5.387 5.387 scf_env_initial_rho_setup 11 6.9 0.000 0.000 5.385 5.385 dbcsr_make_dense_low 4961 15.5 0.067 0.067 5.106 5.106 make_dense_data 4961 16.5 4.500 4.500 5.027 5.027 make_images 3932 14.4 1.938 1.938 4.951 4.951 ot_diis_step 96 11.5 0.003 0.003 4.908 4.908 multiply_cannon 1966 13.4 0.157 0.157 4.907 4.907 wfi_extrapolate 11 7.9 0.001 0.001 4.772 4.772 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.631 4.631 apply_single 107 13.6 0.000 0.000 4.631 4.631 fft_wrap_pw1pw2_140 439 13.2 0.501 0.501 4.571 4.571 multiply_cannon_loop 1966 14.4 0.046 0.046 4.492 4.492 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.465 4.465 multiply_cannon_multrec 1966 15.4 4.398 4.398 4.446 4.446 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.426 4.426 cp_fm_cholesky_decompose 22 10.9 4.388 4.388 4.388 4.388 cp_fm_cholesky_invert 11 10.9 4.067 4.067 4.067 4.067 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.705 3.705 dbcsr_complete_redistribute 317 12.2 1.907 1.907 3.671 3.671 dbcsr_copy 1855 11.9 0.222 0.222 3.404 3.404 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.000 3.371 3.371 qs_create_task_list 11 7.9 0.000 0.000 3.330 3.330 generate_qs_task_list 11 8.9 2.414 2.414 3.330 3.330 density_rs2pw 107 9.7 0.004 0.004 3.286 3.286 copy_dbcsr_to_fm 147 11.2 0.002 0.002 3.167 3.167 dbcsr_copy_into_existing 22 7.9 3.144 3.144 3.144 3.144 fft3d_s 1082 14.6 2.811 2.811 2.816 2.816 qs_ot_get_p 107 10.4 0.001 0.001 2.620 2.620 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.397 2.397 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.354 2.354 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.022 75.000 75.011 qs_mol_dyn_low 1 2.0 0.003 0.004 74.865 74.869 qs_forces 11 3.9 0.001 0.001 74.821 74.822 qs_energies 11 4.9 0.001 0.001 69.856 69.857 scf_env_do_scf 11 5.9 0.000 0.002 64.642 64.643 scf_env_do_scf_inner_loop 96 6.5 0.003 0.022 59.842 59.843 velocity_verlet 10 3.0 0.002 0.004 44.284 44.285 rebuild_ks_matrix 107 8.3 0.000 0.001 32.338 32.426 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.016 32.338 32.426 qs_ks_update_qs_env 107 7.6 0.001 0.001 28.532 28.612 sum_up_and_integrate 107 10.3 0.038 0.042 28.249 28.264 integrate_v_rspace 107 11.3 0.004 0.007 28.211 28.226 qs_rho_update_rho_low 107 7.7 0.000 0.001 27.422 27.429 calculate_rho_elec 107 8.7 0.027 0.028 27.421 27.429 grid_integrate_task_list 107 12.3 24.174 24.554 24.174 24.554 grid_collocate_task_list 107 9.7 23.560 23.946 23.560 23.946 dbcsr_multiply_generic 1966 12.4 0.069 0.075 12.922 13.122 qs_scf_new_mos 96 7.5 0.001 0.001 9.988 10.077 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.988 10.076 multiply_cannon 1966 13.4 0.136 0.156 9.652 10.071 multiply_cannon_loop 1966 14.4 0.092 0.101 9.154 9.564 ot_scf_mini 96 9.5 0.002 0.002 9.380 9.449 mp_waitall_1 146670 16.2 7.949 8.423 7.949 8.423 multiply_cannon_metrocomm3 15728 15.4 0.035 0.038 5.299 5.808 ot_mini 96 10.5 0.001 0.001 5.500 5.581 rs_pw_transfer 878 11.9 0.011 0.015 4.384 5.046 init_scf_loop 11 6.9 0.000 0.000 4.785 4.785 density_rs2pw 107 9.7 0.005 0.006 3.531 4.192 init_scf_run 11 5.9 0.000 0.004 4.134 4.134 scf_env_initial_rho_setup 11 6.9 0.000 0.004 4.133 4.134 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.942 3.954 wfi_extrapolate 11 7.9 0.001 0.001 3.754 3.755 pw_transfer 1295 11.6 0.098 0.111 3.286 3.325 potential_pw2rs 107 12.3 0.007 0.012 3.252 3.266 multiply_cannon_multrec 15728 15.4 3.065 3.223 3.074 3.232 fft_wrap_pw1pw2 1081 12.6 0.009 0.010 3.117 3.159 qs_ot_get_derivative 96 11.5 0.001 0.001 2.810 2.880 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.659 2.766 fft_wrap_pw1pw2_140 439 13.2 0.283 0.302 2.694 2.766 apply_single 107 13.6 0.000 0.000 2.659 2.766 ot_diis_step 96 11.5 0.003 0.003 2.669 2.670 make_m2s 3932 13.4 0.042 0.048 2.258 2.307 fft3d_ps 1081 14.6 1.213 1.282 2.239 2.303 mp_waitany 8968 13.7 1.502 2.189 1.502 2.189 make_images 3932 14.4 0.109 0.114 1.949 1.999 rs_pw_transfer_RS2PW_140 118 11.5 0.178 0.203 1.133 1.797 mp_alltoall_d11v 1998 13.7 0.977 1.552 0.977 1.552 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=46.021, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=28.434, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.923, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.5, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.398, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.388, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=14.750000000000007, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.56, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.174, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.065, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.502, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.949, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.183 0.183 108.400 108.400 qs_energies 1 2.0 0.000 0.000 107.564 107.564 scf_env_do_scf 1 3.0 0.000 0.000 106.394 106.394 qs_ks_update_qs_env 8 5.0 0.000 0.000 101.451 101.451 rebuild_ks_matrix 7 6.0 0.000 0.000 101.394 101.394 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 101.394 101.394 hfx_ks_matrix 7 8.0 0.000 0.000 92.686 92.686 integrate_four_center 7 9.0 2.098 2.098 92.664 92.664 integrate_four_center_main 7 10.0 0.733 0.733 84.440 84.440 integrate_four_center_bin 446 11.0 83.707 83.707 83.707 83.707 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 58.335 58.335 init_scf_loop 1 4.0 0.000 0.000 48.046 48.046 integrate_four_center_load 7 10.0 0.001 0.001 5.869 5.869 hfx_load_balance 1 11.0 0.001 0.001 5.868 5.868 qs_vxc_create 14 8.0 0.000 0.000 3.069 3.069 xc_vxc_pw_create 14 9.0 0.116 0.116 3.069 3.069 hfx_load_balance_count 1 12.0 2.926 2.926 2.926 2.926 hfx_load_balance_bin 1 12.0 2.926 2.926 2.926 2.926 calculate_rho_elec 15 7.4 0.118 0.118 2.448 2.448 prepare_preconditioner 1 5.0 0.000 0.000 2.325 2.325 make_preconditioner 1 6.0 0.000 0.000 2.325 2.325 xc_rho_set_and_dset_create 14 10.0 0.093 0.093 2.307 2.307 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.227 0.252 101.032 101.043 qs_energies 1 2.0 0.000 0.000 100.684 100.685 scf_env_do_scf 1 3.0 0.000 0.000 100.323 100.323 qs_ks_update_qs_env 8 5.0 0.000 0.000 98.240 98.241 rebuild_ks_matrix 7 6.0 0.000 0.000 98.232 98.232 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 98.232 98.232 hfx_ks_matrix 7 8.0 0.000 0.004 92.838 92.842 integrate_four_center 7 9.0 0.056 0.357 92.829 92.830 integrate_four_center_main 7 10.0 0.003 0.003 82.411 85.766 integrate_four_center_bin 448 11.0 82.408 85.763 82.408 85.763 scf_env_do_scf_inner_loop 7 4.0 0.000 0.002 56.398 56.398 init_scf_loop 1 4.0 0.000 0.000 43.923 43.923 integrate_four_center_load 7 10.0 0.000 0.000 5.755 5.756 hfx_load_balance 1 11.0 0.001 0.001 5.755 5.756 mp_sync 70 11.3 3.953 5.298 3.953 5.298 hfx_load_balance_bin 1 12.0 2.817 2.876 2.817 2.876 hfx_load_balance_count 1 12.0 2.810 2.872 2.810 2.872 qs_vxc_create 14 8.0 0.000 0.000 2.390 2.390 xc_vxc_pw_create 14 9.0 0.007 0.008 2.390 2.390 xc_rho_set_and_dset_create 14 10.0 0.010 0.012 1.916 2.029 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=15.826999999999998, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=83.707, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.926, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.926, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.098, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.733, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.183, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=8.757999999999996, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=82.408, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.817, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.81, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.056, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.227, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=3.953, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 83.032 83.032 qs_energies 1 2.0 0.000 0.000 82.651 82.651 mp2_main 1 3.0 0.000 0.000 79.777 79.777 mp2_gpw_main 1 4.0 0.000 0.000 79.664 79.664 rpa_ri_compute_en 1 5.0 0.000 0.000 75.937 75.937 rpa_num_int 1 6.0 0.001 0.001 75.932 75.932 compute_mat_P_omega 1 7.0 0.003 0.003 65.722 65.722 compute_mat_P_omega_contract 10 8.0 9.020 9.020 65.521 65.521 dbt_total 2336 9.6 0.011 0.011 51.804 51.804 dbt_contract 787 11.0 0.034 0.034 44.569 44.569 dbt_tas_total 1149 12.2 0.200 0.200 43.587 43.587 dbt_tas_multiply 807 12.1 0.002 0.002 42.146 42.146 dbt_tas_dbm 807 14.1 0.003 0.003 35.635 35.635 dbm_multiply 807 16.1 35.626 35.626 35.626 35.626 dbt_tas_mm_1N 524 15.1 0.001 0.001 28.095 28.095 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 26.667 26.667 compute_mat_P_omega_calc_M_occ 250 9.0 9.037 9.037 17.172 17.172 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 6.815 6.815 dbt_tas_mm_2 251 15.0 0.001 0.001 5.949 5.949 dbt_copy 1103 10.7 0.082 0.082 5.777 5.777 compute_QP_energies 1 7.0 0.000 0.000 5.135 5.135 compute_self_energy_cubic_gw 1 8.0 0.054 0.054 5.135 5.135 contract_cubic_gw 21 9.0 0.000 0.000 4.101 4.101 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.720 3.720 dbt_tas_reserve_blocks_index 3261 14.3 0.146 0.146 3.599 3.599 dbm_reserve_blocks 3628 15.3 3.515 3.515 3.515 3.515 scf_env_do_scf 1 3.0 0.000 0.000 2.746 2.746 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.746 2.746 dbt_reserve_blocks_index 2280 13.1 0.060 0.060 2.735 2.735 dbt_reserve_blocks_index_array 2222 12.2 0.010 0.010 2.689 2.689 dbt_tas_copy 574 11.4 1.438 1.438 2.335 2.335 dbt_crop 1042 12.0 1.475 1.475 2.323 2.323 dbt_tas_reshape 367 15.0 0.006 0.006 2.181 2.181 convert_to_new_pgrid 2421 14.1 0.073 0.073 2.126 2.126 dbm_copy 1614 15.1 2.053 2.053 2.053 2.053 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.046 2.046 compute_W_cubic_GW 10 7.0 0.010 0.010 1.960 1.960 dbt_reshape 278 11.9 1.005 1.005 1.911 1.911 get_2c_integrals 1 6.0 0.000 0.000 1.783 1.783 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.022 33.632 33.643 qs_energies 1 2.0 0.000 0.001 33.541 33.544 mp2_main 1 3.0 0.000 0.000 32.567 32.570 mp2_gpw_main 1 4.0 0.000 0.000 32.529 32.531 rpa_ri_compute_en 1 5.0 0.000 0.000 31.200 31.203 rpa_num_int 1 6.0 0.000 0.002 31.200 31.202 dbt_total 2336 9.6 0.012 0.013 27.667 27.675 compute_mat_P_omega 1 7.0 0.001 0.005 26.616 26.665 compute_mat_P_omega_contract 10 8.0 0.449 0.463 26.491 26.494 dbt_contract 787 11.0 0.027 0.028 20.787 20.790 dbt_tas_total 1149 12.2 0.055 0.062 18.639 18.639 dbt_tas_multiply 807 12.1 0.002 0.002 18.578 18.581 dbt_tas_dbm 807 14.1 0.003 0.003 13.957 13.967 dbm_multiply 807 16.1 10.871 11.648 10.871 11.648 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.815 7.816 compute_mat_P_omega_calc_M_occ 250 9.0 0.426 0.440 7.795 7.796 dbt_tas_mm_2 251 15.0 0.001 0.002 6.454 6.463 dbt_copy 1111 10.7 0.012 0.013 6.117 6.254 dbt_reshape 1098 11.7 2.312 2.443 5.836 5.977 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.733 5.734 dbt_tas_mm_1N 524 15.1 0.001 0.002 5.137 5.712 mp_sync 8706 11.6 4.767 5.493 4.767 5.493 compute_QP_energies 1 7.0 0.000 0.000 2.880 2.881 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 2.879 2.880 mp_waitall_2 3776 15.3 2.691 2.877 2.691 2.877 dbt_communicate_buffer 1098 12.7 0.054 0.058 2.753 2.859 contract_cubic_gw 21 9.0 0.000 0.000 2.284 2.284 dbt_reserve_blocks_index 2849 13.1 0.071 0.076 1.821 1.980 dbt_reserve_blocks_index_array 2791 12.2 0.009 0.010 1.810 1.969 dbt_tas_reserve_blocks_index 3300 14.5 0.113 0.125 1.788 1.944 dbm_reserve_blocks 3696 15.4 1.777 1.936 1.777 1.936 dbt_crop 1042 12.0 0.958 1.069 1.514 1.651 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.326 1.327 dbt_tas_replicate 396 14.1 0.543 0.703 1.121 1.195 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.030 1.037 parallel_gemm_fm 105 8.4 0.000 0.000 1.011 1.022 parallel_gemm_fm_cosma 105 9.4 1.011 1.022 1.011 1.022 convert_to_new_pgrid 2421 14.1 0.025 0.028 0.901 0.971 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 0.950 0.952 dbm_copy 1608 15.1 0.869 0.940 0.869 0.940 scf_env_do_scf 1 3.0 0.000 0.000 0.935 0.935 scf_env_do_scf_inner_loop 17 4.0 0.000 0.002 0.935 0.935 compute_W_cubic_GW 10 7.0 0.001 0.001 0.762 0.767 mp_max_i 1994 9.8 0.569 0.726 0.569 0.726 dbm_add 807 14.1 0.670 0.715 0.670 0.715 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.776000000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=35.626, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=9.037, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=9.02, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.515, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.053, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.005, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=9.469999999999999, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.871, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.426, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.449, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.777, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.869, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.312, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=4.767, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.691, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 357.969 357.969 qs_forces 1 2.0 0.000 0.000 357.411 357.411 rebuild_ks_matrix 7 6.6 0.000 0.000 355.876 355.876 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 355.876 355.876 hfx_ks_matrix 7 8.6 0.000 0.000 353.849 353.849 hfx_ri_update_ks 7 9.6 0.000 0.000 311.248 311.248 hfx_ri_update_ks_Pmat 7 10.6 31.685 31.685 311.243 311.243 dbt_total 841 11.0 0.005 0.005 298.603 298.603 dbt_contract 207 12.4 0.036 0.036 278.660 278.660 dbt_tas_total 375 13.4 1.569 1.569 278.162 278.162 dbt_tas_multiply 216 13.5 0.001 0.001 273.619 273.619 qs_energies 1 3.0 0.000 0.000 271.755 271.755 scf_env_do_scf 1 4.0 0.000 0.000 271.451 271.451 qs_ks_update_qs_env 8 6.0 0.000 0.000 270.271 270.271 dbt_tas_dbm 216 15.5 0.001 0.001 260.938 260.938 dbm_multiply 216 17.5 260.935 260.935 260.935 260.935 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 254.876 254.876 dbt_tas_mm_2 91 16.5 0.001 0.001 246.512 246.512 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 171.836 171.836 init_scf_loop 2 5.0 0.000 0.000 99.612 99.612 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 85.608 85.608 hfx_ri_update_forces 1 7.0 1.616 1.616 42.598 42.598 hfx_ri_forces_Pmat_3c 1 8.0 4.798 4.798 24.963 24.963 dbt_copy 409 11.7 0.045 0.045 15.391 15.391 precalc_derivatives 1 8.0 2.243 2.243 13.584 13.584 dbt_reshape 132 13.2 6.520 6.520 10.597 10.597 dbt_tas_mm_3T 77 17.1 0.000 0.000 10.043 10.043 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.793 9.793 build_3c_derivatives 3 9.0 3.260 3.260 7.637 7.637 dbt_tas_reserve_blocks_index 1287 15.4 0.286 0.286 7.178 7.178 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.023 46.602 46.610 qs_forces 1 2.0 0.000 0.000 46.462 46.462 rebuild_ks_matrix 7 6.6 0.000 0.000 45.772 45.773 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 45.772 45.773 hfx_ks_matrix 7 8.6 0.000 0.000 44.771 44.778 dbt_total 841 11.0 0.005 0.006 39.752 39.754 dbt_contract 207 12.4 0.022 0.024 31.339 31.362 dbt_tas_total 375 13.4 0.044 0.107 29.035 29.036 dbt_tas_multiply 216 13.5 0.001 0.001 28.085 28.085 hfx_ri_update_ks 7 9.6 0.000 0.000 26.273 26.274 hfx_ri_update_ks_Pmat 7 10.6 1.303 1.369 26.273 26.273 qs_energies 1 3.0 0.000 0.000 24.728 24.728 scf_env_do_scf 1 4.0 0.000 0.001 24.590 24.591 qs_ks_update_qs_env 8 6.0 0.000 0.000 24.049 24.049 dbt_tas_dbm 216 15.5 0.001 0.001 22.519 22.522 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 21.724 21.724 dbm_multiply 216 17.5 20.026 21.205 20.026 21.205 hfx_ri_update_forces 1 7.0 0.063 0.067 18.497 18.504 hfx_ri_forces_Pmat_3c 1 8.0 0.178 0.191 13.840 13.856 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 13.737 13.738 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 12.327 12.328 init_scf_loop 2 5.0 0.000 0.000 10.852 10.852 dbt_tas_mm_2 91 16.5 0.001 0.001 10.344 10.346 mp_sync 2909 12.8 4.561 7.375 4.561 7.375 dbt_copy 421 11.8 0.010 0.013 7.051 7.198 dbt_tas_mm_3T 77 17.1 0.000 0.000 6.261 6.686 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 5.295 5.295 dbt_reshape 252 12.8 2.427 2.517 4.807 4.914 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.249 4.443 precalc_derivatives 1 8.0 0.081 0.086 3.536 3.536 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.325 3.325 dbm_reserve_blocks 1477 16.3 2.830 3.031 2.830 3.031 dbt_tas_reserve_blocks_index 1302 15.5 0.217 0.232 2.793 2.977 dbt_crop 372 13.7 1.822 1.914 2.508 2.658 mp_waitall_2 1204 16.3 2.346 2.448 2.346 2.448 dbt_reserve_blocks_index 938 14.4 0.102 0.108 2.201 2.350 dbt_reserve_blocks_index_array 915 13.4 0.005 0.005 2.176 2.322 build_3c_derivatives 3 9.0 0.229 0.246 1.998 2.006 dbt_tas_replicate 175 15.2 0.716 0.753 1.752 1.796 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.784 1.788 dbt_tas_copy 169 12.8 0.905 0.935 1.577 1.676 dbt_communicate_buffer 252 13.8 0.012 0.013 1.587 1.660 convert_to_new_pgrid 648 15.5 0.035 0.063 1.415 1.591 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.495 1.496 dbm_copy 452 16.3 1.232 1.394 1.232 1.394 dbt_tas_communicate_buffer 352 16.4 0.013 0.014 1.001 1.064 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=50.771000000000015, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=260.935, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=31.685, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=6.52, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.798, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="build_3c_derivatives", label="build_3c_derivatives", y=3.26, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=12.701999999999991, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=20.026, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.303, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.427, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.178, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="build_3c_derivatives", label="build_3c_derivatives", y=0.229, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=4.561, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.83, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.346, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 221.704 221.704 qs_energies 1 2.0 0.000 0.000 221.520 221.520 mp2_main 1 3.0 0.000 0.000 216.583 216.583 mp2_gpw_main 1 4.0 0.001 0.001 216.119 216.119 mp2_ri_gpw_compute_in 1 5.0 0.386 0.386 173.253 173.253 mp2_ri_gpw_compute_in_loop 1 6.0 0.009 0.009 161.926 161.926 mp2_eri_3c_integrate_gpw 2656 7.0 0.012 0.012 131.525 131.525 integrate_v_rspace 2666 8.0 0.600 0.600 118.451 118.451 grid_integrate_task_list 2666 9.0 115.905 115.905 115.905 115.905 mp2_ri_gpw_compute_en 1 5.0 0.086 0.086 42.843 42.843 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.905 9.905 40.992 40.992 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.042 2.042 23.735 23.735 local_gemm 2080 8.0 21.693 21.693 21.693 21.693 dbcsr_multiply_generic 5322 8.0 0.179 0.179 20.893 20.893 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 20.872 20.872 calculate_wavefunction 2656 8.0 8.118 8.118 11.752 11.752 pw_transfer 63872 10.6 1.053 1.053 11.484 11.484 get_2c_integrals 1 6.0 0.000 0.000 10.940 10.940 multiply_cannon 5322 9.0 0.425 0.425 10.748 10.748 fft_wrap_pw1pw2 53228 11.4 0.110 0.110 10.216 10.216 compute_2c_integrals 1 7.0 0.006 0.006 10.092 10.092 compute_2c_integrals_loop_lm 1 8.0 0.007 0.007 10.067 10.067 mp2_eri_2c_integrate_gpw 1 9.0 3.476 3.476 10.060 10.060 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.369 2.369 9.425 9.425 multiply_cannon_loop 5322 10.0 0.105 0.105 9.413 9.413 multiply_cannon_multrec 5322 11.0 7.956 7.956 7.995 7.995 make_m2s 10644 9.0 0.062 0.062 7.988 7.988 make_images 10644 10.0 3.285 3.285 7.691 7.691 copy_dbcsr_to_fm 2679 8.0 0.026 0.026 7.679 7.679 fft_wrap_pw1pw2_20 21271 12.4 0.487 0.487 7.331 7.331 fft3d_s 53229 13.4 6.266 6.266 6.299 6.299 dbcsr_complete_redistribute 2689 9.0 1.109 1.109 6.083 6.083 dbcsr_finalize 10708 9.5 0.192 0.192 5.512 5.512 mp2_ri_gpw_compute_en_ener 2080 7.0 5.379 5.379 5.379 5.379 dbcsr_merge_all 8011 10.3 3.716 3.716 4.862 4.862 scf_env_do_scf 1 3.0 0.000 0.000 4.547 4.547 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.546 4.546 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.021 37.994 38.004 qs_energies 1 2.0 0.000 0.001 37.919 37.920 mp2_main 1 3.0 0.000 0.001 35.761 35.762 mp2_gpw_main 1 4.0 0.001 0.002 35.632 35.633 mp2_ri_gpw_compute_in 1 5.0 0.052 0.052 18.634 19.300 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 17.361 18.031 mp2_ri_gpw_compute_en 1 5.0 0.191 0.206 16.905 17.314 mp2_ri_gpw_compute_en_RI_loop 1 6.0 1.025 1.119 15.511 15.514 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 14.896 15.068 integrate_v_rspace 93 8.1 0.118 0.135 14.736 14.912 grid_integrate_task_list 93 9.1 14.419 14.606 14.419 14.606 mp2_ri_gpw_compute_en_expansio 65 7.0 0.121 0.146 11.579 11.689 local_gemm 65 8.0 11.458 11.586 11.458 11.586 mp2_ri_gpw_compute_en_comm 17 7.0 0.075 0.093 2.489 2.993 dbcsr_multiply_generic 176 8.0 0.009 0.010 2.134 2.793 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.117 2.773 mp_sendrecv_dm3 1054 8.0 1.891 2.492 1.891 2.492 scf_env_do_scf 1 3.0 0.000 0.000 2.035 2.036 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.035 2.036 make_m2s 352 9.0 0.003 0.004 0.901 1.380 make_images 352 10.0 0.056 0.060 0.888 1.366 multiply_cannon 176 9.0 0.017 0.019 1.191 1.364 multiply_cannon_loop 176 10.0 0.002 0.003 1.129 1.298 get_2c_integrals 1 6.0 0.000 0.001 1.204 1.235 mp_min_d 2 7.0 0.693 1.136 0.693 1.136 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 0.670 1.078 multiply_cannon_multrec 246 11.0 0.951 1.046 0.957 1.054 qs_scf_new_mos 10 5.0 0.000 0.000 1.037 1.041 eigensolver 11 5.8 0.001 0.001 1.023 1.024 compute_2c_integrals 1 7.0 0.003 0.004 0.889 0.906 cp_fm_diag_elpa 11 6.8 0.000 0.000 0.855 0.857 hybrid_alltoall_any 458 11.4 0.475 0.831 0.490 0.851 cp_fm_redistribute_end 11 7.8 0.320 0.846 0.332 0.849 make_images_data 352 11.0 0.004 0.005 0.485 0.843 cp_fm_diag_elpa_base 11 7.8 0.502 0.807 0.513 0.824 compute_2c_integrals_loop_lm 1 8.0 0.002 0.004 0.792 0.819 mp2_eri_2c_integrate_gpw 1 9.0 0.211 0.218 0.790 0.817 pw_transfer 2120 10.5 0.047 0.049 0.798 0.813 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=58.12700000000001, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=115.905, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=21.693, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.905, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.118, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.956, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=8.25, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=14.419, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=11.458, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=1.025, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.951, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.891, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.079 0.079 126.970 126.970 qs_energies 1 2.0 0.000 0.000 125.605 125.605 scf_env_do_scf 1 3.0 0.000 0.000 118.928 118.928 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 118.928 118.928 qs_ks_update_qs_env 15 5.0 0.000 0.000 49.557 49.557 rebuild_ks_matrix 15 6.0 0.000 0.000 49.347 49.347 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 49.347 49.347 qs_scf_new_mos 15 5.0 0.000 0.000 43.709 43.709 eigensolver 15 6.0 0.001 0.001 35.825 35.825 qs_vxc_create 15 8.0 0.037 0.037 33.652 33.652 calculate_dispersion_nonloc 15 9.0 6.895 6.895 29.464 29.464 cp_fm_diag_elpa 15 7.0 0.000 0.000 23.040 23.040 cp_fm_diag_elpa_base 15 8.0 20.545 20.545 23.040 23.040 pw_transfer 1191 10.0 0.059 0.059 22.862 22.862 qs_rho_update_rho_low 16 5.0 0.000 0.000 22.813 22.813 calculate_rho_elec 16 6.0 0.230 0.230 22.813 22.813 fft_wrap_pw1pw2 1086 11.0 0.008 0.008 22.683 22.683 grid_collocate_task_list 16 7.0 21.287 21.287 21.287 21.287 fft_wrap_pw1pw2_150 765 12.0 3.552 3.552 16.503 16.503 sum_up_and_integrate 15 8.0 0.094 0.094 14.572 14.572 integrate_v_rspace 15 9.0 0.020 0.020 14.478 14.478 grid_integrate_task_list 15 10.0 13.953 13.953 13.953 13.953 cp_fm_cholesky_restore 45 7.0 10.557 10.557 10.557 10.557 fft3d_s 1087 13.0 10.346 10.346 10.354 10.354 pw_scatter_s 585 13.1 6.898 6.898 6.898 6.898 fft_wrap_pw1pw2_200 197 12.3 0.694 0.694 5.952 5.952 copy_dbcsr_to_fm 16 5.9 0.001 0.001 5.723 5.723 dbcsr_complete_redistribute 46 8.3 2.308 2.308 5.648 5.648 cp_fm_upper_to_full 30 8.0 4.721 4.721 4.721 4.721 vdW_energy 15 10.0 4.313 4.313 4.313 4.313 xc_vxc_pw_create 15 9.0 0.211 0.211 4.151 4.151 gspace_mixing 14 5.0 0.170 0.170 4.105 4.105 broyden_mixing 14 6.0 3.507 3.507 3.507 3.507 init_scf_run 1 3.0 0.000 0.000 3.166 3.166 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.052 3.052 xc_pw_derive 90 11.0 0.001 0.001 2.726 2.726 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.021 62.546 62.557 qs_energies 1 2.0 0.000 0.000 62.273 62.284 scf_env_do_scf 1 3.0 0.000 0.001 58.226 58.227 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 58.226 58.227 qs_ks_update_qs_env 15 5.0 0.000 0.000 23.825 23.877 rebuild_ks_matrix 15 6.0 0.000 0.000 23.791 23.843 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 23.791 23.843 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.692 21.695 calculate_rho_elec 16 6.0 0.007 0.007 21.692 21.695 grid_collocate_task_list 16 7.0 19.540 20.156 19.540 20.156 sum_up_and_integrate 15 8.0 0.012 0.016 14.354 14.447 integrate_v_rspace 15 9.0 0.001 0.001 14.342 14.440 qs_scf_new_mos 15 5.0 0.000 0.000 13.378 13.641 grid_integrate_task_list 15 10.0 13.230 13.564 13.230 13.564 eigensolver 15 6.0 0.001 0.002 12.370 12.388 qs_vxc_create 15 8.0 0.001 0.001 9.146 9.156 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.892 8.893 cp_fm_diag_elpa_base 15 8.0 8.747 8.775 8.889 8.889 calculate_dispersion_nonloc 15 9.0 0.911 0.927 7.343 7.359 pw_transfer 1191 10.0 0.096 0.104 6.847 6.913 fft_wrap_pw1pw2 1086 11.0 0.013 0.014 6.658 6.744 fft3d_ps 1086 13.0 2.610 2.759 4.936 5.124 fft_wrap_pw1pw2_150 765 12.0 0.308 0.327 4.299 4.324 cp_fm_cholesky_restore 45 7.0 3.330 3.376 3.330 3.376 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.446 2.446 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.156 2.347 fft_wrap_pw1pw2_200 197 12.3 0.193 0.215 2.249 2.341 rs_pw_transfer 158 9.4 0.001 0.002 2.053 2.324 density_rs2pw 16 7.0 0.001 0.001 2.033 2.237 mp_waitany 520 11.3 1.360 2.006 1.360 2.006 mp_alltoall_z22v 1086 15.0 1.714 1.962 1.714 1.962 xc_vxc_pw_create 15 9.0 0.015 0.020 1.802 1.827 rs_pw_transfer_RS2PW_200 18 8.8 0.025 0.029 0.928 1.712 build_core_ppnl 1 5.0 1.264 1.382 1.264 1.382 init_scf_run 1 3.0 0.000 0.001 1.376 1.377 vdW_energy 15 10.0 1.312 1.376 1.312 1.376 x_to_yz 585 14.1 0.338 0.359 1.225 1.335 mp_waitall_1 4038 11.6 0.638 1.315 0.638 1.315 scf_env_initial_rho_setup 1 4.0 0.000 0.000 1.288 1.288 xc_pw_derive 90 11.0 0.001 0.002 1.198 1.280 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=50.282, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.287, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=20.545, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.953, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=10.557, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.346, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.089000000000006, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.54, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.747, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.23, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.33, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.61, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.077 0.077 353.192 353.192 qs_energies 1 2.0 0.000 0.000 353.055 353.055 ls_scf 1 3.0 0.000 0.000 351.763 351.763 ls_scf_main 1 4.0 0.002 0.002 342.485 342.485 density_matrix_trs4 11 5.0 0.013 0.013 250.413 250.413 arnoldi_extremal 12 6.1 0.000 0.000 161.226 161.226 arnoldi_normal_ev 12 7.1 0.015 0.015 161.226 161.226 dbcsr_matrix_vector_mult 652 9.0 0.179 0.179 158.668 158.668 build_subspace 23 8.1 0.077 0.077 158.530 158.530 dbcsr_matrix_vector_mult_local 652 10.0 157.124 157.124 157.134 157.134 ls_scf_dm_to_ks 11 5.0 0.000 0.000 86.748 86.748 matrix_ls_to_qs 11 6.0 0.000 0.000 83.424 83.424 dbcsr_multiply_generic 185 6.1 0.814 0.814 78.317 78.317 multiply_cannon 185 7.1 0.305 0.305 46.650 46.650 dbcsr_copy_into_existing 11 7.0 44.063 44.063 44.063 44.063 dbcsr_complete_redistribute 23 7.5 31.496 31.496 43.110 43.110 matrix_decluster 11 7.0 0.000 0.000 39.360 39.360 multiply_cannon_loop 185 8.1 0.219 0.219 34.018 34.018 make_m2s 370 7.1 0.038 0.038 26.572 26.572 make_images 370 8.1 10.862 10.862 24.818 24.818 multiply_cannon_multrec 185 9.1 24.664 24.664 24.792 24.792 dbcsr_finalize 646 7.5 0.196 0.196 15.489 15.489 dbcsr_merge_all 597 8.5 2.366 2.366 14.298 14.298 setup_rec_index_2d 370 8.1 12.207 12.207 12.207 12.207 dbcsr_sort_indices 1103 9.9 11.569 11.569 11.569 11.569 tree_to_linear_d 110 9.4 10.527 10.527 10.527 10.527 quick_finalize 395 10.0 0.406 0.406 10.032 10.032 dbcsr_special_finalize 370 9.1 0.002 0.002 9.284 9.284 calculate_norms 370 9.1 9.007 9.007 9.007 9.007 ls_scf_init_scf 1 4.0 0.000 0.000 8.557 8.557 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.204 8.204 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.521 7.521 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.024 71.517 71.527 qs_energies 1 2.0 0.000 0.000 71.414 71.415 ls_scf 1 3.0 0.000 0.000 71.361 71.362 ls_scf_main 1 4.0 0.001 0.009 68.550 68.551 density_matrix_trs4 11 5.0 0.007 0.018 65.955 66.007 dbcsr_multiply_generic 185 6.1 0.063 0.082 62.431 62.775 multiply_cannon 185 7.1 0.037 0.042 52.366 52.863 multiply_cannon_loop 185 8.1 0.136 0.154 49.681 50.764 multiply_cannon_multrec 1480 9.1 30.933 33.652 31.254 33.989 mp_waitall_1 11936 10.3 15.244 18.192 15.244 18.192 multiply_cannon_metrocomm3 1480 9.1 0.015 0.017 8.952 14.020 make_m2s 370 7.1 0.036 0.039 7.118 7.166 make_images 370 8.1 0.647 0.686 6.985 7.034 calculate_norms 2960 9.1 5.811 6.793 5.811 6.793 multiply_cannon_metrocomm1 1480 9.1 0.009 0.013 3.465 5.608 make_images_data 370 9.1 0.010 0.012 3.104 3.281 hybrid_alltoall_any 393 9.9 0.214 1.180 2.729 2.947 mp_sum_l 1119 5.6 1.715 2.839 1.715 2.839 arnoldi_extremal 12 6.1 0.000 0.000 2.654 2.673 arnoldi_normal_ev 12 7.1 0.001 0.004 2.653 2.673 build_subspace 23 8.1 0.023 0.028 2.556 2.559 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.243 2.325 dbcsr_matrix_vector_mult 652 9.0 0.010 0.048 2.125 2.216 ls_scf_init_scf 1 4.0 0.000 0.000 2.168 2.169 dbcsr_complete_redistribute 23 7.5 1.258 1.330 2.047 2.162 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.142 2.148 matrix_ls_to_qs 11 6.0 0.000 0.000 1.992 2.111 matrix_decluster 11 7.0 0.000 0.000 1.846 1.966 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.953 1.956 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 0.890 1.918 dbcsr_matrix_vector_mult_local 652 10.0 1.765 1.913 1.767 1.915 make_images_pack 370 9.1 1.782 1.885 1.786 1.890 dbcsr_finalize 646 7.5 0.008 0.009 1.605 1.811 buffer_matrices_ensure_size 370 8.1 1.577 1.679 1.577 1.679 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=74.63099999999997, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=157.124, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=44.063, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=31.496, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=24.664, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=12.207, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.007, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.72399999999999, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.765, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.258, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=30.933, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.811, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.782, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=15.244, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.008 71.705 71.705 lib_test 1 2.0 0.000 0.000 71.696 71.696 dbcsr_run_tests 3 3.0 0.002 0.002 71.696 71.696 test_multiplies_multiproc 3 4.0 0.001 0.001 55.559 55.559 dbcsr_redistribute 9 5.0 35.141 35.141 36.764 36.764 dbcsr_multiply_generic 9 5.0 0.002 0.002 17.447 17.447 dbcsr_make_random_matrix 9 4.0 13.033 13.033 16.028 16.028 multiply_cannon 9 6.0 0.002 0.002 12.507 12.507 multiply_cannon_loop 9 7.0 0.026 0.026 12.138 12.138 multiply_cannon_multrec 9 8.0 12.111 12.111 12.112 12.112 dbcsr_finalize 27 5.7 0.021 0.021 5.673 5.673 dbcsr_merge_all 18 6.5 2.111 2.111 4.921 4.921 dbcsr_data_release 975 7.6 2.798 2.798 2.798 2.798 tree_to_linear_d 9 7.0 1.952 1.952 1.952 1.952 make_m2s 18 6.0 0.001 0.001 1.662 1.662 make_images 18 7.0 0.568 0.568 1.606 1.606 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.016 19.641 19.645 lib_test 1 2.0 0.000 0.000 19.387 19.407 dbcsr_run_tests 3 3.0 0.000 0.001 19.386 19.405 test_multiplies_multiproc 3 4.0 0.000 0.003 18.483 18.522 dbcsr_multiply_generic 9 5.0 0.001 0.013 17.070 17.194 multiply_cannon 9 6.0 0.002 0.002 15.179 15.524 multiply_cannon_loop 9 7.0 0.002 0.002 14.874 15.189 multiply_cannon_multrec 72 8.0 12.606 12.935 12.606 12.936 mp_waitall_1 576 9.2 2.586 3.174 2.586 3.174 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 2.040 2.614 dbcsr_data_release 444 7.6 0.801 0.903 0.801 0.903 dbcsr_make_random_matrix 9 4.0 0.708 0.721 0.870 0.897 mp_sum_l 390 2.5 0.414 0.870 0.414 0.870 dbcsr_finalize 27 5.7 0.000 0.000 0.762 0.856 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.395 0.840 dbcsr_destroy 111 5.9 0.000 0.001 0.645 0.766 make_m2s 18 6.0 0.001 0.001 0.718 0.743 make_images 18 7.0 0.022 0.023 0.714 0.740 dbcsr_merge_all 18 6.5 0.110 0.127 0.605 0.696 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.220 0.589 dbcsr_redistribute 9 5.0 0.262 0.296 0.437 0.462 dbcsr_data_copy_aa2 18 7.5 0.363 0.433 0.363 0.433 make_images_data 18 8.0 0.001 0.001 0.360 0.415 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.510999999999996, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=35.141, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=13.033, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.111, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.798, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.111, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.1539999999999964, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.262, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.708, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.606, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.801, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.11, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.586, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.414, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.046 131.958 131.958 qs_mol_dyn_low 1 2.0 0.003 0.003 130.557 130.557 velocity_verlet 5 3.0 0.003 0.003 106.566 106.566 qmmm_el_coupling 6 3.8 0.000 0.000 87.264 87.264 qmmm_elec_with_gaussian 6 4.8 0.013 0.013 87.260 87.260 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 86.624 86.624 qmmm_elec_gaussian_low_G 6 6.8 85.734 85.734 85.734 85.734 qs_forces 6 3.8 0.000 0.000 34.811 34.811 qs_energies 6 4.8 0.000 0.000 30.910 30.910 scf_env_do_scf 6 5.8 0.001 0.001 28.659 28.659 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 24.913 24.913 rebuild_ks_matrix 45 8.4 0.000 0.000 23.911 23.911 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 23.910 23.910 qs_ks_update_qs_env 45 7.8 0.000 0.000 20.466 20.466 pw_transfer 966 12.3 0.053 0.053 17.179 17.179 fft_wrap_pw1pw2 801 13.6 0.006 0.006 16.984 16.984 fft_wrap_pw1pw2_150 507 15.2 2.237 2.237 16.575 16.575 qs_vxc_create 45 10.4 0.001 0.001 13.180 13.180 xc_vxc_pw_create 45 11.4 0.682 0.682 13.180 13.180 xc_pw_derive 270 13.4 0.002 0.002 9.264 9.264 fft3d_s 802 15.6 7.877 7.877 7.885 7.885 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.313 7.313 calculate_rho_elec 45 8.9 0.563 0.563 7.313 7.313 xc_rho_set_and_dset_create 45 12.4 0.560 0.560 6.707 6.707 xc_pw_divergence 45 12.4 0.001 0.001 5.739 5.739 pw_scatter_s 429 15.8 5.595 5.595 5.595 5.595 qmmm_forces 6 3.8 0.001 0.001 5.394 5.394 qmmm_forces_with_gaussian 6 4.8 0.018 0.018 5.058 5.058 pw_integral_ab 2539 7.4 4.449 4.449 4.449 4.449 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.290 4.290 qs_ks_ddapc 45 10.4 0.001 0.001 4.153 4.153 init_scf_loop 6 6.8 0.000 0.000 3.741 3.741 qmmm_forces_gaussian_low_G 6 6.8 3.510 3.510 3.510 3.510 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.452 3.452 sum_up_and_integrate 45 10.4 0.339 0.339 3.448 3.448 grid_collocate_task_list 45 9.9 3.398 3.398 3.398 3.398 density_rs2pw 45 9.9 0.002 0.002 3.352 3.352 integrate_v_rspace 45 11.4 0.006 0.006 3.109 3.109 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.043 0.059 58.961 58.976 qs_mol_dyn_low 1 2.0 0.003 0.004 57.872 57.929 qs_forces 6 3.8 0.001 0.001 41.066 41.066 qs_energies 6 4.8 0.000 0.000 39.209 39.210 scf_env_do_scf 6 5.8 0.000 0.001 38.051 38.051 scf_env_do_scf_inner_loop 113 6.2 0.003 0.017 36.418 36.420 rebuild_ks_matrix 119 8.1 0.000 0.000 26.971 26.983 qs_ks_build_kohn_sham_matrix 119 9.1 0.016 0.017 26.971 26.983 qs_ks_update_qs_env 119 7.3 0.001 0.001 25.413 25.424 velocity_verlet 5 3.0 0.002 0.004 25.286 25.290 pw_transfer 2446 12.3 0.208 0.234 18.000 18.442 fft_wrap_pw1pw2 2059 13.4 0.025 0.027 17.524 17.944 fft_wrap_pw1pw2_150 1321 14.9 1.463 1.594 16.883 17.317 qs_vxc_create 119 10.1 0.002 0.003 14.233 14.237 xc_vxc_pw_create 119 11.1 0.158 0.217 14.231 14.235 fft3d_ps 2059 15.4 7.461 8.225 13.042 13.569 xc_pw_derive 714 13.1 0.009 0.011 10.731 11.053 qs_rho_update_rho_low 119 7.3 0.000 0.001 11.001 11.003 calculate_rho_elec 119 8.3 0.049 0.055 11.001 11.002 sum_up_and_integrate 119 10.1 0.091 0.106 9.364 9.380 integrate_v_rspace 119 11.1 0.003 0.003 9.273 9.299 qmmm_forces 6 3.8 0.002 0.002 8.800 8.801 qmmm_forces_with_gaussian 6 4.8 0.009 0.012 7.656 8.695 xc_pw_divergence 119 12.1 0.005 0.005 6.998 7.234 qmmm_el_coupling 6 3.8 0.000 0.000 7.146 7.221 qmmm_elec_with_gaussian 6 4.8 0.003 0.004 7.144 7.219 xc_rho_set_and_dset_create 119 12.1 0.377 0.459 6.871 7.038 rs_pw_transfer 988 11.5 0.011 0.012 6.486 6.818 density_rs2pw 119 9.3 0.006 0.008 6.143 6.480 potential_pw2rs 119 12.1 0.007 0.008 5.381 5.398 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.154 5.167 grid_collocate_task_list 119 9.3 4.710 5.045 4.710 5.045 mp_alltoall_z22v 2059 17.4 4.036 4.454 4.036 4.454 qmmm_forces_gaussian_low_G 6 6.8 3.420 4.444 3.420 4.444 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.589 4.077 grid_integrate_task_list 119 12.1 3.624 3.829 3.624 3.829 qmmm_elec_gaussian_low_G 6 6.8 2.986 3.474 2.986 3.474 x_to_yz 1095 16.8 0.850 0.954 3.006 3.310 pw_restrict_s3 18 5.8 1.526 1.570 2.892 2.992 yz_to_x 964 16.0 0.636 0.837 2.515 2.806 mp_waitany 4028 12.8 2.343 2.719 2.343 2.719 rs_pw_transfer_PW2RS_150 125 13.9 0.827 0.878 2.447 2.480 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.394 2.469 pw_prolongate_s3 18 6.8 1.264 1.285 2.394 2.468 rs_pw_transfer_RS2PW_150 125 11.2 0.592 0.738 1.875 2.224 mp_waitall_1 188862 16.2 1.862 2.089 1.862 2.089 qs_scf_new_mos 113 7.2 0.000 0.000 2.022 2.029 qs_scf_loop_do_ot 113 8.2 0.000 0.000 2.022 2.028 qs_ks_ddapc 119 10.1 0.002 0.002 1.973 2.028 ot_scf_mini 113 9.2 0.001 0.001 1.935 1.943 dbcsr_multiply_generic 2588 12.3 0.058 0.060 1.876 1.921 pw_gather_p 964 15.0 1.386 1.821 1.386 1.821 mp_sum_dm3 33 5.7 1.648 1.706 1.648 1.706 pw_integral_ab 2761 7.7 1.378 1.456 1.593 1.672 init_scf_loop 6 6.8 0.000 0.000 1.631 1.638 pw_scatter_p 1095 15.8 1.575 1.634 1.575 1.634 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.568 1.569 mp_sum_dm 514 5.2 1.164 1.393 1.164 1.393 ot_mini 113 10.2 0.000 0.001 1.199 1.207 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=21.39500000000001, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=85.734, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=7.877, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.595, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.449, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.51, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.398, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=31.345999999999997, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.986, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.378, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.42, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.71, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.624, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.461, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.036, yerr=0.0 Summary: Performance test took 37 minutes. Status: OK Removing intermediate container 49dba035c53d ---> c1668481aef6 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 4ffe8e999bbe Removing intermediate container 4ffe8e999bbe ---> 8c02e907600f Step 42/42 : ENTRYPOINT [] ---> Running in cbc3453859d7 Removing intermediate container cbc3453859d7 ---> 5ec0b67050af [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 5ec0b67050af Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-11-28 19:54:03+00:00