StartDate: 2022-03-21 19:18:27+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 349cba18bddc2f8d256cd43eb2a7f103d6a8f7a2 CommitTime: 2022-03-21 16:02:39 +0100 CommitAuthor: Hans Pabst CommitSubject: Offload: account for OpenCL (#2016) Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=349cba18bddc2f8d256cd43eb2a7f103d6a8f7a2 Sending build context to Docker daemon 362.4MB Step 1/41 : FROM ubuntu:20.04 20.04: Pulling from library/ubuntu 4d32b49e2995: Already exists Digest: sha256:bea6d19168bbfd6af8d77c2cc3c572114eb5d113e6f422573c93cb605a0e2ffb Status: Downloaded newer image for ubuntu:20.04 ---> ff0fea8310f3 Step 2/41 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> d0669c0aabd9 Step 3/41 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> cb090919180c Step 4/41 : RUN ./install_requirements.sh ubuntu:20.04 ---> Using cache ---> bd73f8302b91 Step 5/41 : RUN mkdir scripts ---> Using cache ---> 53b24070f3b0 Step 6/41 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> a1d083987197 Step 7/41 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 5c1431a6ac5f Step 8/41 : RUN ./install_cp2k_toolchain.sh --install-all --mpi-mode=mpich --dry-run ---> Using cache ---> adc258e7623d Step 9/41 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> eb018c5f0c83 Step 10/41 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> 3ebc073d9e34 Step 11/41 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 8b05620a388b Step 12/41 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 6c62fbd42cc7 Step 13/41 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> dd606d77cb5b Step 14/41 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 015ba6923c7e Step 15/41 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> bb17589f654b Step 16/41 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 642e501510d9 Step 17/41 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 50e6517f2722 Step 18/41 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> ce2e1343c5a2 Step 19/41 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 5a0b503b0b57 Step 20/41 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 6386d3fd5854 Step 21/41 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> 1a6b2e4f41df Step 22/41 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> d2f8fa973633 Step 23/41 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 8668944c9b70 Step 24/41 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 93405018154a Step 25/41 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> b5ec6ba4809b Step 26/41 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> fadb5e3ed52d Step 27/41 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> f2ddf20e3b4b Step 28/41 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 38ed668a384f Step 29/41 : WORKDIR /opt/cp2k ---> Using cache ---> a8a423560a0f Step 30/41 : COPY ./Makefile . ---> 81a5ee7a6474 Step 31/41 : COPY ./src ./src ---> be82c3b066d3 Step 32/41 : COPY ./exts ./exts ---> 84ca1310bcad Step 33/41 : COPY ./tools/build_utils ./tools/build_utils ---> 5b03914632f2 Step 34/41 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true )" ---> Running in ddb3f90108e1 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container ddb3f90108e1 ---> 358276940604 Step 35/41 : COPY ./data ./data ---> d99bc819ea0c Step 36/41 : COPY ./tests ./tests ---> 74525f3fce56 Step 37/41 : COPY ./tools/regtesting ./tools/regtesting ---> 8dafa3e2bfea Step 38/41 : COPY ./benchmarks ./benchmarks ---> 944ca810146b Step 39/41 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 5c7418ed7a5a Step 40/41 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in f2fafcae0a7a ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 60 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 174.778 174.778 qs_mol_dyn_low 1 2.0 0.004 0.004 173.901 173.901 qs_forces 11 3.9 0.002 0.002 173.837 173.837 qs_energies 11 4.9 0.001 0.001 162.925 162.925 scf_env_do_scf 11 5.9 0.001 0.001 127.854 127.854 velocity_verlet 10 3.0 0.002 0.002 123.472 123.472 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 86.008 86.008 init_scf_loop 11 6.9 0.000 0.000 41.628 41.628 prepare_preconditioner 11 7.9 0.000 0.000 37.559 37.559 make_preconditioner 11 8.9 0.000 0.000 37.559 37.559 make_full_inverse_cholesky 11 9.9 0.000 0.000 35.474 35.474 rebuild_ks_matrix 119 8.3 0.001 0.001 33.770 33.770 qs_ks_build_kohn_sham_matrix 119 9.3 0.021 0.021 33.769 33.769 qs_ks_update_qs_env 119 7.6 0.001 0.001 31.567 31.567 qs_rho_update_rho 119 7.7 0.001 0.001 30.758 30.758 calculate_rho_elec 119 8.7 1.578 1.578 30.757 30.757 qs_scf_new_mos 108 7.5 0.001 0.001 30.022 30.022 qs_scf_loop_do_ot 108 8.5 0.001 0.001 30.021 30.021 ot_scf_mini 108 9.5 0.004 0.004 27.928 27.928 dbcsr_multiply_generic 2286 12.5 0.206 0.206 25.999 25.999 grid_collocate_task_list 119 9.7 24.231 24.231 24.231 24.231 cp_fm_cholesky_invert 11 10.9 20.921 20.921 20.921 20.921 sum_up_and_integrate 119 10.3 0.415 0.415 20.881 20.881 integrate_v_rspace 119 11.3 0.579 0.579 20.466 20.466 init_scf_run 11 5.9 0.001 0.001 17.874 17.874 scf_env_initial_rho_setup 11 6.9 0.001 0.001 17.873 17.873 grid_integrate_task_list 119 12.3 17.165 17.165 17.165 17.165 wfi_extrapolate 11 7.9 0.001 0.001 17.007 17.007 ot_mini 108 10.5 0.001 0.001 16.692 16.692 cp_gemm 81 9.0 0.000 0.000 16.501 16.501 cp_gemm_cosma 81 10.0 16.501 16.501 16.501 16.501 make_m2s 4572 13.5 0.073 0.073 14.762 14.762 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.745 11.745 qs_ot_get_derivative 108 11.5 0.002 0.002 8.523 8.523 pw_transfer 1439 11.6 0.104 0.104 8.516 8.516 fft_wrap_pw1pw2 1201 12.6 0.011 0.011 8.164 8.164 ot_diis_step 108 11.5 0.006 0.006 8.164 8.164 cp_fm_cholesky_decompose 22 10.9 8.002 8.002 8.002 8.002 make_images 4572 14.5 2.856 2.856 7.844 7.844 dbcsr_make_dense_low 5837 15.5 0.108 0.108 7.096 7.096 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 7.006 7.006 apply_single 119 13.6 0.001 0.001 7.005 7.005 make_dense_data 5837 16.5 6.255 6.255 6.963 6.963 fft_wrap_pw1pw2_140 487 13.2 0.665 0.665 6.920 6.920 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.816 6.816 qs_ot_get_p 119 10.4 0.001 0.001 6.794 6.794 dbcsr_complete_redistribute 329 12.2 3.194 3.194 6.629 6.629 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.430 6.430 dbcsr_make_images_dense 3978 14.8 0.030 0.030 6.415 6.415 multiply_cannon 2286 13.5 1.060 1.060 6.113 6.113 dbcsr_copy 2102 12.0 0.310 0.310 6.055 6.055 qs_create_task_list 11 7.9 0.000 0.000 5.837 5.837 generate_qs_task_list 11 8.9 3.974 3.974 5.837 5.837 dbcsr_copy_into_existing 22 7.9 5.693 5.693 5.694 5.694 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.467 5.467 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.277 5.277 calculate_w_matrix_ot 11 6.9 0.009 0.009 5.276 5.276 qs_ot_p2m_diag 50 11.0 0.212 0.212 5.117 5.117 density_rs2pw 119 9.7 0.007 0.007 4.948 4.948 pw_poisson_solve 119 10.3 1.998 1.998 4.795 4.795 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.617 4.617 cp_dbcsr_syevd 50 12.0 0.004 0.004 4.549 4.549 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.514 4.514 multiply_cannon_loop 2286 14.5 0.055 0.055 4.423 4.423 cp_fm_diag_elpa 50 13.0 0.001 0.001 4.382 4.382 cp_fm_diag_elpa_base 50 14.0 4.324 4.324 4.381 4.381 multiply_cannon_multrec 2286 15.5 4.267 4.267 4.366 4.366 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.093 4.093 fft3d_s 1202 14.6 3.759 3.759 3.766 3.766 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.014 76.936 76.937 qs_mol_dyn_low 1 2.0 0.007 0.009 76.792 76.798 qs_forces 11 3.9 0.002 0.002 76.732 76.733 qs_energies 11 4.9 0.001 0.002 71.635 71.637 scf_env_do_scf 11 5.9 0.001 0.001 64.594 64.595 scf_env_do_scf_inner_loop 108 6.5 0.003 0.010 59.893 59.893 velocity_verlet 10 3.0 0.002 0.002 45.963 45.965 rebuild_ks_matrix 119 8.3 0.001 0.001 29.847 29.882 qs_ks_build_kohn_sham_matrix 119 9.3 0.024 0.025 29.846 29.882 qs_ks_update_qs_env 119 7.6 0.001 0.002 26.573 26.605 qs_rho_update_rho 119 7.7 0.001 0.001 23.090 23.102 calculate_rho_elec 119 8.7 0.048 0.050 23.089 23.101 sum_up_and_integrate 119 10.3 0.051 0.056 22.947 23.006 integrate_v_rspace 119 11.3 0.005 0.006 22.896 22.957 dbcsr_multiply_generic 2286 12.5 0.138 0.141 18.799 19.106 grid_collocate_task_list 119 9.7 15.852 16.759 15.852 16.759 grid_integrate_task_list 119 12.3 15.629 16.458 15.629 16.458 qs_scf_new_mos 108 7.5 0.001 0.001 15.056 15.087 qs_scf_loop_do_ot 108 8.5 0.001 0.002 15.055 15.086 ot_scf_mini 108 9.5 0.004 0.004 14.144 14.176 multiply_cannon 2286 13.5 0.241 0.250 12.674 12.966 multiply_cannon_loop 2286 14.5 0.234 0.248 11.550 11.932 mp_waitall_1 169478 16.3 9.971 10.270 9.971 10.270 rs_pw_transfer 974 11.9 0.018 0.019 7.652 8.592 ot_mini 108 10.5 0.001 0.002 8.338 8.371 density_rs2pw 119 9.7 0.010 0.011 6.544 7.486 multiply_cannon_metrocomm3 18288 15.5 0.086 0.091 6.530 6.948 pw_transfer 1439 11.6 0.142 0.149 6.593 6.666 fft_wrap_pw1pw2 1201 12.6 0.015 0.016 6.273 6.347 potential_pw2rs 119 12.3 0.010 0.011 5.880 5.889 fft_wrap_pw1pw2_140 487 13.2 0.636 0.654 5.464 5.646 init_scf_run 11 5.9 0.000 0.002 4.864 4.865 scf_env_initial_rho_setup 11 6.9 0.000 0.001 4.864 4.864 fft3d_ps 1201 14.6 2.569 2.738 4.696 4.773 init_scf_loop 11 6.9 0.001 0.001 4.681 4.682 wfi_extrapolate 11 7.9 0.001 0.001 4.440 4.440 ot_diis_step 108 11.5 0.005 0.005 4.386 4.386 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.330 4.382 apply_single 119 13.6 0.001 0.001 4.330 4.381 make_m2s 4572 13.5 0.078 0.083 4.277 4.344 qs_ot_get_derivative 108 11.5 0.001 0.002 3.920 3.951 multiply_cannon_multrec 18288 15.5 3.696 3.878 3.715 3.897 make_images 4572 14.5 0.194 0.201 3.549 3.622 mp_waitany 9880 13.7 2.604 3.554 2.604 3.554 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.518 3.522 rs_pw_transfer_RS2PW_140 130 11.5 0.650 0.709 2.387 3.339 rs_pw_transfer_PW2RS_140 130 13.9 1.442 1.498 2.971 3.013 mp_alltoall_d11v 2130 13.8 1.760 2.415 1.760 2.415 rs_gather_matrices 119 12.3 0.147 0.162 1.324 1.978 qs_ot_get_p 119 10.4 0.001 0.001 1.865 1.927 make_images_data 4572 15.5 0.064 0.069 1.744 1.859 cp_gemm 81 9.0 0.000 0.000 1.660 1.665 cp_gemm_cosma 81 10.0 1.660 1.665 1.660 1.665 hybrid_alltoall_any 4725 16.4 0.132 0.472 1.546 1.664 mp_alltoall_z22v 1201 16.6 1.413 1.613 1.413 1.613 prepare_preconditioner 11 7.9 0.000 0.000 1.539 1.553 make_preconditioner 11 8.9 0.000 0.000 1.539 1.553 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.443 1.546 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=83.691, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=24.231, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=20.921, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.165, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=16.501, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=8.002, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.267, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=27.524000000000015, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.852, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.629, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.66, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.696, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.604, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.971, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.037 227.966 227.966 qs_mol_dyn_low 1 2.0 0.004 0.004 227.076 227.076 qs_forces 11 3.9 0.002 0.002 227.016 227.016 qs_energies 11 4.9 0.001 0.001 212.068 212.068 scf_env_do_scf 11 5.9 0.001 0.001 173.004 173.004 velocity_verlet 10 3.0 0.002 0.002 155.488 155.488 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 128.677 128.677 rebuild_ks_matrix 107 8.3 0.001 0.001 63.754 63.754 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.019 63.753 63.753 qs_ks_update_qs_env 107 7.6 0.001 0.001 57.444 57.444 qs_rho_update_rho 107 7.7 0.001 0.001 56.741 56.741 calculate_rho_elec 107 8.7 1.418 1.418 56.740 56.740 sum_up_and_integrate 107 10.3 0.378 0.378 51.883 51.883 integrate_v_rspace 107 11.3 0.604 0.604 51.504 51.504 grid_collocate_task_list 107 9.7 50.780 50.780 50.780 50.780 grid_integrate_task_list 107 12.3 48.350 48.350 48.350 48.350 init_scf_loop 11 6.9 0.000 0.000 44.117 44.117 prepare_preconditioner 11 7.9 0.000 0.000 36.614 36.614 make_preconditioner 11 8.9 0.000 0.000 36.614 36.614 make_full_inverse_cholesky 11 9.9 0.000 0.000 34.474 34.474 qs_scf_new_mos 96 7.5 0.001 0.001 27.196 27.196 qs_scf_loop_do_ot 96 8.5 0.001 0.001 27.195 27.195 ot_scf_mini 96 9.5 0.003 0.003 25.207 25.207 dbcsr_multiply_generic 1966 12.4 0.181 0.181 23.910 23.910 init_scf_run 11 5.9 0.001 0.001 20.961 20.961 scf_env_initial_rho_setup 11 6.9 0.001 0.001 20.960 20.960 cp_fm_cholesky_invert 11 10.9 20.027 20.027 20.027 20.027 wfi_extrapolate 11 7.9 0.001 0.001 19.793 19.793 cp_gemm 81 9.0 0.000 0.000 16.469 16.469 cp_gemm_cosma 81 10.0 16.468 16.468 16.468 16.468 ot_mini 96 10.5 0.001 0.001 15.201 15.201 make_m2s 3932 13.4 0.063 0.063 13.634 13.634 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 12.663 12.663 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 8.066 8.066 pw_transfer 1295 11.6 0.095 0.095 7.883 7.883 cp_fm_cholesky_decompose 22 10.9 7.863 7.863 7.863 7.863 qs_ot_get_derivative 96 11.5 0.002 0.002 7.749 7.749 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.584 7.584 fft_wrap_pw1pw2 1081 12.6 0.010 0.010 7.551 7.551 ot_diis_step 96 11.5 0.005 0.005 7.447 7.447 make_images 3932 14.4 2.664 2.664 7.264 7.264 qs_create_task_list 11 7.9 0.000 0.000 7.003 7.003 generate_qs_task_list 11 8.9 5.135 5.135 7.003 7.003 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 6.879 6.879 dbcsr_complete_redistribute 317 12.2 3.162 3.162 6.628 6.628 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 6.584 6.584 apply_single 107 13.6 0.001 0.001 6.584 6.584 dbcsr_make_dense_low 4961 15.5 0.088 0.088 6.535 6.535 make_dense_data 4961 16.5 5.806 5.806 6.426 6.426 fft_wrap_pw1pw2_140 439 13.2 0.662 0.662 6.420 6.420 qs_ot_get_p 107 10.4 0.001 0.001 5.987 5.987 dbcsr_make_images_dense 3386 14.7 0.025 0.025 5.928 5.928 multiply_cannon 1966 13.4 1.029 1.029 5.615 5.615 dbcsr_copy 1855 11.9 0.283 0.283 5.465 5.465 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.445 5.445 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.260 5.260 calculate_w_matrix_ot 11 6.9 0.009 0.009 5.260 5.260 dbcsr_copy_into_existing 22 7.9 5.136 5.136 5.136 5.136 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.013 133.762 133.762 qs_mol_dyn_low 1 2.0 0.005 0.005 133.632 133.644 qs_forces 11 3.9 0.002 0.002 133.577 133.577 qs_energies 11 4.9 0.001 0.002 124.454 124.456 scf_env_do_scf 11 5.9 0.001 0.001 114.251 114.252 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 106.065 106.065 velocity_verlet 10 3.0 0.002 0.002 80.547 80.549 rebuild_ks_matrix 107 8.3 0.001 0.001 60.950 60.995 qs_ks_build_kohn_sham_matrix 107 9.3 0.023 0.023 60.949 60.994 sum_up_and_integrate 107 10.3 0.046 0.050 54.530 54.564 integrate_v_rspace 107 11.3 0.005 0.005 54.483 54.519 qs_ks_update_qs_env 107 7.6 0.001 0.001 53.649 53.685 qs_rho_update_rho 107 7.7 0.001 0.001 50.206 50.223 calculate_rho_elec 107 8.7 0.043 0.045 50.205 50.222 grid_integrate_task_list 107 12.3 46.933 48.025 46.933 48.025 grid_collocate_task_list 107 9.7 42.975 44.238 42.975 44.238 dbcsr_multiply_generic 1966 12.4 0.123 0.125 17.227 17.297 qs_scf_new_mos 96 7.5 0.001 0.001 13.503 13.555 qs_scf_loop_do_ot 96 8.5 0.001 0.001 13.502 13.554 ot_scf_mini 96 9.5 0.003 0.003 12.671 12.724 multiply_cannon 1966 13.4 0.208 0.212 11.710 11.878 multiply_cannon_loop 1966 14.4 0.210 0.220 10.692 11.071 mp_waitall_1 146670 16.2 9.215 9.468 9.215 9.468 rs_pw_transfer 878 11.9 0.017 0.018 7.829 9.063 init_scf_loop 11 6.9 0.000 0.001 8.166 8.166 init_scf_run 11 5.9 0.000 0.002 7.972 7.972 scf_env_initial_rho_setup 11 6.9 0.001 0.001 7.972 7.972 density_rs2pw 107 9.7 0.009 0.010 6.601 7.857 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.529 7.537 ot_mini 96 10.5 0.001 0.002 7.479 7.533 wfi_extrapolate 11 7.9 0.001 0.001 7.323 7.323 multiply_cannon_metrocomm3 15728 15.4 0.076 0.080 6.077 6.471 pw_transfer 1295 11.6 0.130 0.137 6.137 6.219 fft_wrap_pw1pw2 1081 12.6 0.014 0.015 5.843 5.923 potential_pw2rs 107 12.3 0.009 0.010 5.566 5.577 fft_wrap_pw1pw2_140 439 13.2 0.581 0.597 5.113 5.304 fft3d_ps 1081 14.6 2.439 2.587 4.382 4.459 mp_waitany 8968 13.7 3.151 4.358 3.151 4.358 apply_preconditioner_dbcsr 107 12.6 0.000 0.001 3.965 4.022 apply_single 107 13.6 0.001 0.001 3.965 4.021 rs_pw_transfer_RS2PW_140 118 11.5 0.492 0.529 2.774 4.003 ot_diis_step 96 11.5 0.004 0.005 3.945 3.946 make_m2s 3932 13.4 0.068 0.073 3.861 3.915 mp_alltoall_d11v 1998 13.7 2.324 3.706 2.324 3.706 multiply_cannon_multrec 15728 15.4 3.420 3.561 3.437 3.578 qs_ot_get_derivative 96 11.5 0.001 0.001 3.502 3.556 make_images 3932 14.4 0.171 0.175 3.212 3.267 rs_gather_matrices 107 12.3 0.142 0.155 1.919 3.246 rs_pw_transfer_PW2RS_140 118 13.9 1.418 1.465 2.944 2.985 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=84.47800000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=50.78, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=48.35, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=20.027, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=16.468, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.863, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=28.067999999999998, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=42.975, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=46.933, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=9.215, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.151, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.42, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.514 0.514 292.926 292.926 qs_energies 1 2.0 0.000 0.000 291.408 291.408 scf_env_do_scf 1 3.0 0.000 0.000 288.682 288.682 qs_ks_update_qs_env 8 5.0 0.000 0.000 270.773 270.773 rebuild_ks_matrix 7 6.0 0.000 0.000 270.658 270.658 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 270.658 270.658 hfx_ks_matrix 7 8.0 0.000 0.000 178.375 178.375 integrate_four_center 7 9.0 2.360 2.360 178.340 178.340 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 167.675 167.675 integrate_four_center_main 7 10.0 0.818 0.818 164.864 164.864 integrate_four_center_bin 455 11.0 164.047 164.047 164.047 164.047 init_scf_loop 1 4.0 0.000 0.000 120.987 120.987 cp_gemm 129 10.3 0.001 0.001 76.451 76.451 cp_gemm_cosma 129 11.3 76.450 76.450 76.450 76.450 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 44.219 44.219 admm_fit_mo_coeffs 7 9.0 0.000 0.000 42.337 42.337 admm_mo_merge_derivs 7 8.0 0.000 0.000 39.096 39.096 merge_mo_derivs_diag 7 9.0 0.024 0.024 39.096 39.096 purify_mo_diag 7 10.0 0.001 0.001 24.938 24.938 fit_mo_coeffs 7 10.0 0.000 0.000 17.399 17.399 prepare_preconditioner 1 5.0 0.000 0.000 13.758 13.758 make_preconditioner 1 6.0 0.000 0.000 13.758 13.758 integrate_four_center_load 7 10.0 0.000 0.000 10.707 10.707 hfx_load_balance 1 11.0 0.002 0.002 10.706 10.706 arnoldi_normal_ev 11 9.3 0.002 0.002 7.954 7.954 estimate_cond_num 1 7.0 0.000 0.000 7.871 7.871 build_subspace 28 9.5 0.017 0.017 7.821 7.821 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.218 0.224 200.342 200.344 qs_energies 1 2.0 0.000 0.000 199.951 199.951 scf_env_do_scf 1 3.0 0.000 0.001 199.240 199.241 qs_ks_update_qs_env 8 5.0 0.000 0.000 195.810 195.810 rebuild_ks_matrix 7 6.0 0.000 0.000 195.794 195.794 qs_ks_build_kohn_sham_matrix 7 7.0 0.003 0.004 195.794 195.794 hfx_ks_matrix 7 8.0 0.001 0.001 181.902 181.904 integrate_four_center 7 9.0 0.128 0.452 181.884 181.885 integrate_four_center_main 7 10.0 0.005 0.006 165.167 167.910 integrate_four_center_bin 448 11.0 165.162 167.905 165.162 167.905 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 111.833 111.833 init_scf_loop 1 4.0 0.000 0.000 87.405 87.405 integrate_four_center_load 7 10.0 0.000 0.000 11.685 11.694 hfx_load_balance 1 11.0 0.001 0.002 11.685 11.694 mp_sync 70 11.3 4.084 7.400 4.084 7.400 cp_gemm 129 10.3 0.001 0.001 5.991 5.997 cp_gemm_cosma 129 11.3 5.990 5.997 5.990 5.997 hfx_load_balance_bin 1 12.0 5.586 5.973 5.586 5.973 hfx_load_balance_count 1 12.0 5.441 5.695 5.441 5.695 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 4.522 4.530 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=48.73699999999994, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=164.047, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=76.45, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.36, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.818, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.514, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=13.728000000000009, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=165.162, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=5.99, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.128, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.218, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=5.586, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=4.084, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=5.441, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 421.230 421.230 qs_energies 1 2.0 0.000 0.000 420.645 420.645 mp2_main 1 3.0 0.000 0.000 414.057 414.057 mp2_gpw_main 1 4.0 0.000 0.000 413.677 413.677 rpa_ri_compute_en 1 5.0 0.000 0.000 399.337 399.337 rpa_num_int 1 6.0 0.001 0.001 399.313 399.313 cp_gemm 105 8.4 0.001 0.001 185.541 185.541 cp_gemm_cosma 105 9.4 185.541 185.541 185.541 185.541 compute_mat_P_omega 1 7.0 0.002 0.002 182.735 182.735 compute_mat_P_omega_contract 10 8.0 12.823 12.823 181.113 181.113 dbt_total 2336 9.6 0.024 0.024 173.694 173.694 GW_matrix_operations 10 7.0 0.006 0.006 129.958 129.958 dbt_contract 787 11.0 50.770 50.770 105.530 105.530 dbt_copy 1103 10.7 22.497 22.497 66.717 66.717 compute_mat_P_omega_calc_M_occ 250 9.0 12.839 12.839 65.314 65.314 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 63.247 63.247 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 61.034 61.034 dbt_tas_total 1149 12.2 0.270 0.270 51.680 51.680 dbt_tas_multiply 807 12.1 0.005 0.005 50.268 50.268 dbt_tas_dbm 807 14.1 0.006 0.006 40.231 40.231 dbm_multiply 807 16.1 40.217 40.217 40.217 40.217 compute_mat_P_omega_calc_M_vir 250 9.0 0.002 0.002 39.671 39.671 dbt_tas_mm_1N 524 15.1 0.002 0.002 24.096 24.096 compute_QP_energies 1 7.0 0.000 0.000 21.295 21.295 compute_self_energy_cubic_gw 1 8.0 0.108 0.108 21.295 21.295 dbt_tas_copy 574 11.4 17.703 17.703 21.076 21.076 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 19.408 19.408 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 15.390 15.390 dbt_tas_mm_2 251 15.0 0.002 0.002 14.339 14.339 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 14.323 14.323 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 13.742 13.742 dbt_copy_nocomm 251 12.0 12.045 12.045 13.617 13.617 dbt_tas_reserve_blocks_index 3261 13.7 7.874 7.874 12.899 12.899 dbt_reserve_blocks_index 2280 12.5 1.642 1.642 10.808 10.808 dbt_reserve_blocks_index_array 2222 11.6 0.013 0.013 10.771 10.771 cp_fm_cholesky_invert 10 8.0 9.716 9.716 9.716 9.716 contract_cubic_gw 21 9.0 0.000 0.000 9.087 9.087 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.010 53.886 53.887 qs_energies 1 2.0 0.001 0.001 53.759 53.760 mp2_main 1 3.0 0.000 0.000 52.224 52.224 mp2_gpw_main 1 4.0 0.000 0.001 52.164 52.165 rpa_ri_compute_en 1 5.0 0.000 0.000 50.265 50.266 rpa_num_int 1 6.0 0.001 0.001 50.257 50.258 dbt_total 2336 9.6 0.019 0.020 37.509 37.511 compute_mat_P_omega 1 7.0 0.001 0.002 36.304 36.311 compute_mat_P_omega_contract 10 8.0 0.677 0.709 35.973 35.980 dbt_contract 787 11.0 1.860 2.027 27.843 27.861 dbt_tas_total 1149 12.2 0.077 0.095 24.106 24.107 dbt_tas_multiply 807 12.1 0.003 0.003 24.031 24.034 dbt_tas_dbm 807 14.1 0.006 0.006 16.599 16.600 dbm_multiply 807 16.1 12.997 14.041 12.997 14.041 compute_mat_P_omega_calc_M_occ 250 9.0 0.662 0.693 11.864 11.864 cp_gemm 105 8.4 0.000 0.000 9.498 9.511 cp_gemm_cosma 105 9.4 9.498 9.510 9.498 9.510 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.095 9.096 dbt_copy 1111 10.7 3.741 3.976 8.204 8.475 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 7.637 7.638 mp_sync 8706 11.6 6.459 7.606 6.459 7.606 dbt_tas_mm_2 251 15.0 0.003 0.003 7.205 7.206 dbt_tas_mm_1N 524 15.1 0.003 0.003 6.329 6.993 GW_matrix_operations 10 7.0 0.001 0.002 6.135 6.140 compute_QP_energies 1 7.0 0.000 0.000 4.134 4.134 compute_self_energy_cubic_gw 1 8.0 0.005 0.005 4.132 4.134 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.326 3.326 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.195 3.196 dbt_communicate_buffer 1098 11.7 0.092 0.100 3.036 3.152 mp_waitall_2 3776 14.7 2.916 3.148 2.916 3.148 contract_cubic_gw 21 9.0 0.000 0.000 3.138 3.138 dbt_reserve_blocks_index 2849 12.4 0.104 0.111 2.230 2.472 dbt_reserve_blocks_index_array 2791 11.4 0.012 0.013 2.229 2.471 dbt_tas_reserve_blocks_index 3300 13.8 0.270 0.287 2.184 2.421 dbt_tas_replicate 396 14.1 1.219 1.531 2.308 2.407 dbm_reserve_blocks 3696 14.8 1.990 2.215 1.990 2.215 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 1.895 1.897 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 1.569 1.574 convert_to_new_pgrid 2421 14.1 0.030 0.032 1.328 1.490 scf_env_do_scf 1 3.0 0.000 0.000 1.477 1.477 scf_env_do_scf_inner_loop 17 4.0 0.001 0.001 1.476 1.477 dbm_copy 1608 15.1 1.290 1.453 1.290 1.453 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.002 1.435 1.440 mp_max_i 1992 9.8 1.113 1.401 1.113 1.401 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=104.50200000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=185.541, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_contract", label="dbt_contract", y=50.77, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=40.217, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_copy", label="dbt_copy", y=22.497, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=17.703, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=16.415000000000006, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.498, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_contract", label="dbt_contract", y=1.86, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=12.997, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_copy", label="dbt_copy", y=3.741, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.916, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=6.459, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 758.583 758.583 qs_forces 1 2.0 0.000 0.000 757.689 757.689 rebuild_ks_matrix 7 6.6 0.000 0.000 747.490 747.490 qs_ks_build_kohn_sham_matrix 7 7.6 0.002 0.002 747.490 747.490 hfx_ks_matrix 7 8.6 0.000 0.000 744.722 744.722 dbt_total 4858 11.6 0.057 0.057 638.740 638.740 hfx_ri_update_ks 7 9.6 0.000 0.000 519.184 519.184 hfx_ri_update_ks_Pmat 7 10.6 68.830 68.830 519.179 519.179 qs_energies 1 3.0 0.000 0.000 465.974 465.974 scf_env_do_scf 1 4.0 0.000 0.000 465.509 465.509 qs_ks_update_qs_env 8 6.0 0.000 0.000 455.856 455.856 dbt_contract 1473 13.0 188.869 188.869 423.678 423.678 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 291.640 291.640 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 269.593 269.593 dbt_tas_total 2391 14.1 2.391 2.391 253.264 253.264 hfx_ri_update_forces 1 7.0 0.000 0.000 225.533 225.533 dbt_tas_multiply 1482 14.0 0.007 0.007 219.966 219.966 init_scf_loop 2 5.0 0.000 0.000 195.912 195.912 hfx_ri_update_ks_Pmat_KS 567 11.6 0.007 0.007 183.064 183.064 dbt_copy 2330 12.4 93.097 93.097 181.681 181.681 dbt_tas_dbm 1482 16.0 0.010 0.010 176.574 176.574 dbm_multiply 1482 18.0 176.547 176.547 176.547 176.547 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.004 0.004 147.383 147.383 hfx_ri_forces_Pmat_3c 1 8.0 0.003 0.003 133.607 133.607 dbt_tas_mm_2 649 17.1 0.007 0.007 119.403 119.403 precalc_derivatives 1 8.0 0.012 0.012 72.063 72.063 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 54.104 54.104 dbt_tas_mm_3T 659 17.1 0.004 0.004 45.325 45.325 dbt_tas_reserve_blocks_index 7234 15.4 19.347 19.347 36.513 36.513 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.003 0.003 35.746 35.746 dbt_tas_copy 1474 13.1 27.481 27.481 35.135 35.135 dbt_tas_reshape 906 14.4 0.026 0.026 34.588 34.588 dbt_reserve_blocks_index 4836 14.6 3.948 3.948 31.107 31.107 dbt_reserve_blocks_index_array 4801 13.6 0.036 0.036 30.848 30.848 build_3c_derivatives 9 9.0 3.698 3.698 29.474 29.474 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.001 0.001 21.822 21.822 dbt_tas_reshape_buffer_obtain 906 15.4 13.797 13.797 19.887 19.887 dbt_split_copyback 87 12.2 14.254 14.254 16.029 16.029 dbt_tas_replicate 906 15.6 10.522 10.522 15.679 15.679 dbt_split_blocks_generic 174 12.2 12.232 12.232 15.289 15.289 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.011 100.454 100.455 qs_forces 1 2.0 0.000 0.000 100.180 100.180 rebuild_ks_matrix 7 6.6 0.000 0.000 99.164 99.165 qs_ks_build_kohn_sham_matrix 7 7.6 0.003 0.005 99.164 99.165 hfx_ks_matrix 7 8.6 0.000 0.001 97.783 97.801 dbt_total 4858 11.6 0.041 0.045 88.233 88.235 dbt_contract 1473 13.0 6.247 7.555 66.606 66.619 dbt_tas_total 2391 14.1 0.317 0.481 63.462 63.463 dbt_tas_multiply 1482 14.0 0.007 0.008 54.582 54.586 hfx_ri_update_ks 7 9.6 0.000 0.000 53.500 53.500 hfx_ri_update_ks_Pmat 7 10.6 2.721 3.965 53.498 53.498 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 51.093 51.093 qs_energies 1 3.0 0.000 0.000 49.070 49.070 scf_env_do_scf 1 4.0 0.000 0.000 48.865 48.865 qs_ks_update_qs_env 8 6.0 0.000 0.000 48.072 48.073 hfx_ri_update_forces 1 7.0 0.001 0.001 44.281 44.300 dbt_tas_dbm 1482 16.0 0.009 0.010 38.134 38.136 hfx_ri_forces_Pmat_3c 1 8.0 0.003 0.004 35.931 35.938 dbm_multiply 1482 18.0 24.914 30.339 24.914 30.339 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 28.578 28.578 mp_sync 17507 13.6 21.811 26.165 21.811 26.165 hfx_ri_update_ks_Pmat_KS 567 11.6 0.006 0.006 24.904 24.905 dbt_tas_mm_2 649 17.1 0.007 0.007 21.061 21.062 init_scf_loop 2 5.0 0.000 0.000 20.287 20.287 hfx_ri_update_ks_Pmat_Px3C 567 11.6 0.003 0.003 12.972 12.972 dbt_copy 2352 12.4 3.595 3.814 9.950 10.807 dbt_tas_merge 649 14.1 3.320 4.633 5.809 7.380 dbt_tas_mm_3T 659 17.1 0.003 0.004 6.528 7.183 precalc_derivatives 1 8.0 0.003 0.003 6.787 6.787 dbt_tas_mm_3N 163 16.5 0.001 0.001 5.400 5.488 mp_waitall_2 6131 16.2 4.961 5.475 4.961 5.475 hfx_ri_pre_scf_Pmat 1 12.0 0.001 0.001 5.457 5.457 dbt_tas_reserve_blocks_index 7352 15.5 0.658 0.793 3.958 4.800 dbt_tas_communicate_buffer 1908 16.3 0.106 0.112 3.982 4.435 dbt_tas_reshape 999 14.5 0.019 0.020 4.060 4.428 dbt_tas_replicate 909 15.6 1.350 1.412 4.325 4.399 dbm_reserve_blocks 8261 16.3 3.645 4.352 3.645 4.352 mp_max_i 3372 12.5 3.202 4.075 3.202 4.075 dbt_reserve_blocks_index 5240 14.5 0.186 0.216 3.323 3.955 dbt_reserve_blocks_index_array 5205 13.5 0.019 0.021 3.314 3.947 mp_alltoall_i 3170 15.2 3.360 3.792 3.360 3.792 mp_sum_l 38201 15.3 2.864 3.523 2.864 3.523 build_3c_derivatives 9 9.0 0.621 0.676 3.365 3.372 convert_to_new_pgrid 4446 16.0 0.065 0.120 2.714 3.278 hfx_ri_pre_scf_Pmat_RIx3C 81 13.0 0.000 0.000 3.116 3.123 dbm_copy 3041 16.9 2.509 3.071 2.509 3.071 dbm_add 1482 16.0 1.931 2.774 1.931 2.774 dbt_tas_reshape_alltoall 999 15.5 0.002 0.003 1.994 2.386 dbt_tas_merge_communicate_buff 649 15.1 0.010 0.011 1.933 2.320 hfx_ri_update_ks_Pmat_copy_2 567 11.6 0.003 0.003 2.242 2.243 dbt_communicate_buffer 1260 13.5 0.066 0.072 1.911 2.046 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=203.75900000000001, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_contract", label="dbt_contract", y=188.869, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=176.547, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_copy", label="dbt_copy", y=93.097, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=68.83, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_tas_copy", label="dbt_tas_copy", y=27.481, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=32.55999999999999, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_contract", label="dbt_contract", y=6.247, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=24.914, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_copy", label="dbt_copy", y=3.595, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=2.721, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_tas_copy", label="dbt_tas_copy", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=21.811, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.645, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=4.961, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.019 283.203 283.203 qs_energies 1 2.0 0.000 0.000 282.904 282.904 mp2_main 1 3.0 0.000 0.000 268.140 268.140 mp2_gpw_main 1 4.0 0.002 0.002 266.860 266.860 mp2_ri_gpw_compute_in 1 5.0 0.934 0.934 191.226 191.226 mp2_ri_gpw_compute_in_loop 1 6.0 0.036 0.036 158.629 158.629 mp2_eri_3c_integrate_gpw 2656 7.0 0.036 0.036 113.804 113.804 integrate_v_rspace 2666 8.0 3.303 3.303 92.965 92.965 grid_integrate_task_list 2666 9.0 86.184 86.184 86.184 86.184 mp2_ri_gpw_compute_en 1 5.0 0.060 0.060 75.576 75.576 mp2_ri_gpw_compute_en_RI_loop 1 6.0 38.196 38.196 71.494 71.494 calculate_wavefunction 5312 9.0 28.097 28.097 37.831 37.831 get_2c_integrals 1 6.0 0.000 0.000 31.598 31.598 compute_2c_integrals 1 7.0 0.013 0.013 29.434 29.434 compute_2c_integrals_loop_lm 1 8.0 0.026 0.026 29.393 29.393 mp2_eri_2c_integrate_gpw 1 9.0 5.211 5.211 29.367 29.367 dbcsr_multiply_generic 5322 8.0 0.356 0.356 28.506 28.506 ao_to_mo_and_store_B_mult_1 2656 7.0 0.028 0.028 28.470 28.470 mp2_ri_gpw_compute_en_expansio 2080 7.0 4.365 4.365 21.663 21.663 offload_gemm 2080 8.0 17.297 17.297 17.297 17.297 pw_transfer 63872 10.6 2.140 2.140 17.229 17.229 multiply_cannon 5322 9.0 3.252 3.252 16.192 16.192 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 10.654 10.654 16.137 16.137 fft_wrap_pw1pw2 53228 11.4 0.235 0.235 14.764 14.764 scf_env_do_scf 1 3.0 0.000 0.000 13.985 13.985 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 13.984 13.984 qs_scf_new_mos 10 5.0 0.000 0.000 11.829 11.829 mp2_ri_gpw_compute_en_ener 2080 7.0 11.621 11.621 11.621 11.621 multiply_cannon_loop 5322 10.0 0.171 0.171 11.406 11.406 multiply_cannon_multrec 5322 11.0 9.885 9.885 9.950 9.950 fft3d_s 53229 13.4 9.734 9.734 9.813 9.813 fft_wrap_pw1pw2_20 21271 12.4 0.588 0.588 9.802 9.802 make_m2s 10644 9.0 0.105 0.105 8.809 8.809 make_images 10644 10.0 3.063 3.063 8.211 8.211 eigensolver 11 5.8 0.002 0.002 8.042 8.042 cp_fm_diag_elpa 11 6.8 0.000 0.000 7.310 7.310 cp_fm_diag_elpa_base 11 7.8 7.068 7.068 7.310 7.310 potential_pw2rs 5322 10.0 0.278 0.278 6.608 6.608 copy_dbcsr_to_fm 2679 8.0 0.056 0.056 6.593 6.593 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.010 61.143 61.144 qs_energies 1 2.0 0.000 0.001 61.034 61.034 mp2_main 1 3.0 0.000 0.000 57.799 57.800 mp2_gpw_main 1 4.0 0.001 0.002 57.630 57.630 mp2_ri_gpw_compute_in 1 5.0 0.056 0.058 34.199 34.985 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 31.519 32.309 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.002 28.222 29.031 integrate_v_rspace 93 8.1 0.196 0.210 27.893 28.687 grid_integrate_task_list 93 9.1 27.334 28.145 27.334 28.145 mp2_ri_gpw_compute_en 1 5.0 0.320 0.330 23.331 23.649 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.197 2.381 19.679 20.221 mp2_ri_gpw_compute_en_expansio 65 7.0 0.214 0.245 9.282 9.956 offload_gemm 65 8.0 9.069 9.720 9.069 9.720 mp2_ri_gpw_compute_en_comm 65 7.0 4.207 4.620 7.444 7.945 mp_sendrecv_dm3 390 8.0 3.237 3.879 3.237 3.879 scf_env_do_scf 1 3.0 0.000 0.000 3.043 3.044 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 3.042 3.044 dbcsr_multiply_generic 176 8.0 0.013 0.013 2.715 2.828 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.683 2.790 get_2c_integrals 1 6.0 0.000 0.000 2.514 2.536 compute_2c_integrals 1 7.0 0.005 0.006 2.188 2.204 compute_2c_integrals_loop_lm 1 8.0 0.002 0.004 1.965 2.086 mp2_eri_2c_integrate_gpw 1 9.0 0.431 0.469 1.964 2.084 mp2_ri_create_group 1 6.0 0.000 0.000 2.056 2.081 replicate_iaK_2intgroup 1 7.0 1.500 1.661 1.903 2.081 calculate_wavefunction 166 9.0 0.884 0.935 1.639 1.719 pw_transfer 2120 10.5 0.080 0.086 1.525 1.572 make_m2s 352 9.0 0.004 0.005 1.417 1.499 make_images 352 10.0 0.067 0.068 1.403 1.485 qs_scf_new_mos 10 5.0 0.000 0.001 1.460 1.480 eigensolver 11 5.8 0.001 0.001 1.468 1.468 fft_wrap_pw1pw2 1768 11.4 0.009 0.010 1.414 1.456 multiply_cannon 176 9.0 0.022 0.024 1.230 1.268 cp_fm_diag_elpa 11 6.8 0.000 0.000 1.243 1.245 cp_fm_redistribute_end 11 7.8 0.469 1.233 0.479 1.234 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=101.80799999999996, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=86.184, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=38.196, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=28.097, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="offload_gemm", label="offload_gemm", y=17.297, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_ener", label="mp2_ri_gpw_compute_en_ener", y=11.621, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_comm", label="mp2_ri_gpw_compute_en_comm", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=14.214999999999996, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.334, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=2.197, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.884, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="offload_gemm", label="offload_gemm", y=9.069, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_ener", label="mp2_ri_gpw_compute_en_ener", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_comm", label="mp2_ri_gpw_compute_en_comm", y=4.207, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=3.237, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.113 0.113 205.983 205.983 qs_energies 1 2.0 0.000 0.000 203.970 203.970 scf_env_do_scf 1 3.0 0.000 0.000 193.208 193.208 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 193.208 193.208 qs_scf_new_mos 15 5.0 0.001 0.001 87.464 87.464 qs_ks_update_qs_env 15 5.0 0.000 0.000 73.087 73.087 rebuild_ks_matrix 15 6.0 0.000 0.000 72.672 72.672 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 72.672 72.672 eigensolver 15 6.0 0.002 0.002 71.720 71.720 cp_fm_diag_elpa 15 7.0 0.000 0.000 55.423 55.423 cp_fm_diag_elpa_base 15 8.0 49.885 49.885 55.423 55.423 qs_vxc_create 15 8.0 0.038 0.038 48.028 48.028 calculate_dispersion_nonloc 15 9.0 9.720 9.720 42.587 42.587 pw_transfer 1191 10.0 0.104 0.104 28.926 28.926 fft_wrap_pw1pw2 1086 11.0 0.014 0.014 28.631 28.631 qs_rho_update_rho 16 5.0 0.000 0.000 25.058 25.058 calculate_rho_elec 16 6.0 0.343 0.343 25.058 25.058 grid_collocate_task_list 16 7.0 23.425 23.425 23.425 23.425 sum_up_and_integrate 15 8.0 0.087 0.087 22.865 22.865 integrate_v_rspace 15 9.0 0.032 0.032 22.778 22.778 grid_integrate_task_list 15 10.0 22.113 22.113 22.113 22.113 fft_wrap_pw1pw2_150 765 12.0 3.557 3.557 21.661 21.661 copy_dbcsr_to_fm 16 5.9 0.001 0.001 12.616 12.616 pw_scatter_s 585 13.1 11.907 11.907 11.907 11.907 fft3d_s 1087 13.0 11.452 11.452 11.464 11.464 dbcsr_complete_redistribute 46 8.3 4.026 4.026 11.203 11.203 cp_fm_cholesky_restore 45 7.0 11.014 11.014 11.014 11.014 cp_fm_upper_to_full 30 8.0 10.819 10.819 10.819 10.819 gspace_mixing 14 5.0 0.279 0.279 8.871 8.871 vdW_energy 15 10.0 8.642 8.642 8.642 8.642 broyden_mixing 14 6.0 8.063 8.063 8.064 8.064 fft_wrap_pw1pw2_200 197 12.3 0.337 0.337 6.703 6.703 xc_vxc_pw_create 15 9.0 0.335 0.335 5.404 5.404 init_scf_run 1 3.0 0.000 0.000 4.997 4.997 dbcsr_finalize 159 9.9 0.024 0.024 4.874 4.874 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.833 4.833 dbcsr_merge_all 91 11.1 0.090 0.090 4.704 4.704 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.016 82.173 82.174 qs_energies 1 2.0 0.000 0.001 81.792 81.793 scf_env_do_scf 1 3.0 0.000 0.000 76.723 76.724 scf_env_do_scf_inner_loop 15 4.0 0.001 0.002 76.723 76.724 qs_ks_update_qs_env 15 5.0 0.000 0.000 37.059 37.069 rebuild_ks_matrix 15 6.0 0.000 0.000 37.021 37.031 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.004 37.021 37.031 qs_rho_update_rho 16 5.0 0.000 0.000 23.732 23.734 calculate_rho_elec 16 6.0 0.011 0.012 23.732 23.734 sum_up_and_integrate 15 8.0 0.009 0.011 23.053 23.080 integrate_v_rspace 15 9.0 0.001 0.001 23.044 23.070 grid_collocate_task_list 16 7.0 21.834 22.843 21.834 22.843 grid_integrate_task_list 15 10.0 21.626 22.219 21.626 22.219 qs_scf_new_mos 15 5.0 0.001 0.001 16.602 16.635 eigensolver 15 6.0 0.002 0.002 15.433 15.438 qs_vxc_create 15 8.0 0.001 0.001 13.565 13.574 cp_fm_diag_elpa 15 7.0 0.000 0.000 11.344 11.352 cp_fm_diag_elpa_base 15 8.0 11.101 11.149 11.338 11.341 calculate_dispersion_nonloc 15 9.0 1.333 1.382 11.086 11.101 pw_transfer 1191 10.0 0.115 0.129 9.808 9.891 fft_wrap_pw1pw2 1086 11.0 0.017 0.019 9.578 9.667 fft3d_ps 1086 13.0 4.263 4.395 7.315 7.474 fft_wrap_pw1pw2_150 765 12.0 0.505 0.543 6.473 6.510 cp_fm_cholesky_restore 45 7.0 3.868 3.919 3.868 3.919 qs_energies_init_hamiltonians 1 3.0 0.000 0.001 3.153 3.154 fft_wrap_pw1pw2_200 197 12.3 0.280 0.299 2.975 3.045 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.747 2.976 xc_vxc_pw_create 15 9.0 0.026 0.032 2.478 2.490 mp_alltoall_z22v 1086 15.0 1.914 2.319 1.914 2.319 rs_pw_transfer 158 9.4 0.002 0.003 1.825 2.258 vdW_energy 15 10.0 1.985 2.083 1.985 2.083 density_rs2pw 16 7.0 0.001 0.002 1.769 2.057 build_core_ppnl 1 5.0 1.835 2.009 1.835 2.009 mp_waitany 520 11.3 1.221 1.829 1.221 1.829 rs_pw_transfer_RS2PW_200 18 8.8 0.072 0.076 0.980 1.727 x_to_yz 585 14.1 0.716 0.784 1.659 1.725 xc_pw_derive 90 11.0 0.002 0.002 1.626 1.697 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=76.18700000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=49.885, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.425, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.113, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=11.907, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=11.452, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.014, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=19.480999999999995, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=11.101, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.834, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=21.626, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.868, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=4.263, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.094 0.094 352.408 352.408 qs_energies 1 2.0 0.000 0.000 352.212 352.212 ls_scf 1 3.0 0.000 0.000 350.233 350.233 ls_scf_main 1 4.0 0.002 0.002 334.911 334.911 density_matrix_trs4 11 5.0 0.012 0.012 188.238 188.238 ls_scf_dm_to_ks 11 5.0 0.000 0.000 138.809 138.809 matrix_ls_to_qs 11 6.0 0.000 0.000 134.115 134.115 dbcsr_multiply_generic 185 6.1 0.654 0.654 118.599 118.599 multiply_cannon 185 7.1 3.357 3.357 81.022 81.022 dbcsr_copy_into_existing 11 7.0 75.292 75.292 75.292 75.292 dbcsr_complete_redistribute 23 7.5 46.382 46.382 64.491 64.491 matrix_decluster 11 7.0 0.000 0.000 58.821 58.821 multiply_cannon_loop 185 8.1 0.452 0.452 57.883 57.883 multiply_cannon_multrec 185 9.1 55.791 55.791 55.858 55.858 arnoldi_extremal 12 6.1 0.000 0.000 48.241 48.241 arnoldi_normal_ev 12 7.1 0.028 0.028 48.241 48.241 build_subspace 23 8.1 0.140 0.140 47.558 47.558 dbcsr_matrix_vector_mult 652 9.0 0.266 0.266 36.632 36.632 dbcsr_matrix_vector_mult_local 652 10.0 34.915 34.915 34.925 34.925 make_m2s 370 7.1 0.033 0.033 30.847 30.847 make_images 370 8.1 7.317 7.317 28.320 28.320 dbcsr_finalize 646 7.5 0.219 0.219 24.199 24.199 dbcsr_merge_all 597 8.5 4.089 4.089 21.775 21.775 setup_rec_index_2d 370 8.1 19.567 19.567 19.567 19.567 dbcsr_sort_indices 1103 9.9 18.119 18.119 18.119 18.119 tree_to_linear_d 110 9.4 15.574 15.574 15.574 15.574 quick_finalize 395 10.0 0.569 0.569 15.532 15.532 dbcsr_special_finalize 370 9.1 0.003 0.003 14.311 14.311 ls_scf_init_scf 1 4.0 0.000 0.000 14.098 14.098 ls_scf_init_matrix_S 1 5.0 0.000 0.000 13.627 13.627 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 12.683 12.683 dbcsr_dot_sd 144 6.3 10.305 10.305 10.306 10.306 dbcsr_frobenius_norm 142 6.1 8.629 8.629 8.632 8.632 matrix_qs_to_ls 12 5.1 0.000 0.000 8.210 8.210 matrix_cluster 12 6.1 0.000 0.000 8.210 8.210 dbcsr_new_transposed 2 7.0 0.149 0.149 7.189 7.189 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.010 85.064 85.065 qs_energies 1 2.0 0.000 0.000 84.967 84.967 ls_scf 1 3.0 0.000 0.000 84.880 84.880 ls_scf_main 1 4.0 0.000 0.002 81.500 81.500 density_matrix_trs4 11 5.0 0.008 0.012 77.823 77.881 dbcsr_multiply_generic 185 6.1 0.077 0.096 72.397 72.663 multiply_cannon 185 7.1 0.041 0.043 60.564 61.307 multiply_cannon_loop 185 8.1 0.193 0.201 57.351 58.598 multiply_cannon_multrec 1480 9.1 38.606 41.524 39.065 41.993 mp_waitall_1 11936 10.3 16.613 18.581 16.613 18.581 multiply_cannon_metrocomm3 1480 9.1 0.018 0.020 9.813 12.616 make_m2s 370 7.1 0.033 0.037 8.151 8.245 make_images 370 8.1 0.710 0.750 8.034 8.130 multiply_cannon_metrocomm1 1480 9.1 0.011 0.013 3.827 5.904 calculate_norms 2960 9.1 4.377 4.512 4.377 4.512 arnoldi_extremal 12 6.1 0.000 0.000 4.168 4.178 arnoldi_normal_ev 12 7.1 0.002 0.008 4.168 4.177 build_subspace 23 8.1 0.037 0.051 4.039 4.042 make_images_data 370 9.1 0.011 0.012 3.276 3.586 dbcsr_matrix_vector_mult 652 9.0 0.019 0.081 3.259 3.420 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.172 3.260 dbcsr_complete_redistribute 23 7.5 1.923 1.995 2.851 2.940 matrix_ls_to_qs 11 6.0 0.000 0.000 2.819 2.912 mp_sum_l 1119 5.6 2.284 2.880 2.284 2.880 hybrid_alltoall_any 393 9.9 0.234 1.223 2.677 2.849 dbcsr_matrix_vector_mult_local 652 10.0 2.547 2.718 2.552 2.722 matrix_decluster 11 7.0 0.000 0.000 2.552 2.646 ls_scf_init_scf 1 4.0 0.000 0.000 2.625 2.626 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.587 2.596 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.369 2.371 make_images_pack 370 9.1 2.066 2.302 2.070 2.307 dbcsr_finalize 646 7.5 0.014 0.014 1.867 1.997 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 1.400 1.906 dbcsr_add_d 280 6.0 0.001 0.002 1.786 1.842 dbcsr_add_anytype 280 7.0 0.951 1.007 1.785 1.841 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=120.46100000000001, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=75.292, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=55.791, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=46.382, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=34.915, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=19.567, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=18.713999999999984, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=38.606, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.923, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.547, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=4.377, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.613, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.284, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 109.379 109.379 lib_test 1 2.0 0.000 0.000 109.372 109.372 dbcsr_run_tests 3 3.0 0.003 0.003 109.372 109.372 test_multiplies_multiproc 3 4.0 0.001 0.001 88.100 88.100 dbcsr_redistribute 9 5.0 59.345 59.345 63.272 63.272 dbcsr_multiply_generic 9 5.0 0.002 0.002 22.931 22.931 dbcsr_make_random_matrix 9 4.0 15.269 15.269 21.175 21.175 multiply_cannon 9 6.0 0.002 0.002 15.661 15.661 multiply_cannon_loop 9 7.0 0.005 0.005 15.107 15.107 multiply_cannon_multrec 9 8.0 15.102 15.102 15.103 15.103 dbcsr_finalize 27 5.7 0.004 0.004 10.138 10.138 dbcsr_merge_all 18 6.5 3.607 3.607 9.331 9.331 tree_to_linear_d 9 7.0 3.585 3.585 3.585 3.585 mp_alltoall_d11v 27 6.0 3.580 3.580 3.580 3.580 dbcsr_data_release 975 7.6 2.630 2.630 2.630 2.630 make_m2s 18 6.0 0.001 0.001 2.373 2.373 make_images 18 7.0 0.771 0.771 2.283 2.283 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 23.309 23.309 lib_test 1 2.0 0.000 0.000 23.274 23.298 dbcsr_run_tests 3 3.0 0.001 0.001 23.273 23.296 test_multiplies_multiproc 3 4.0 0.001 0.002 22.128 22.188 dbcsr_multiply_generic 9 5.0 0.001 0.002 20.172 20.281 multiply_cannon 9 6.0 0.002 0.002 17.993 18.389 multiply_cannon_loop 9 7.0 0.003 0.004 17.592 17.944 multiply_cannon_multrec 72 8.0 14.764 15.364 14.766 15.365 mp_waitall_1 576 9.2 3.196 3.766 3.196 3.766 multiply_cannon_metrocomm1 72 8.0 0.001 0.002 2.529 3.233 dbcsr_make_random_matrix 9 4.0 0.894 0.955 1.100 1.148 dbcsr_finalize 27 5.7 0.000 0.001 0.892 0.996 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.286 0.958 mp_sum_l 390 2.5 0.473 0.936 0.473 0.936 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.467 0.930 make_m2s 18 6.0 0.001 0.001 0.844 0.915 make_images 18 7.0 0.026 0.027 0.840 0.912 dbcsr_data_release 444 7.6 0.731 0.861 0.731 0.861 dbcsr_merge_all 18 6.5 0.139 0.155 0.770 0.843 dbcsr_destroy 111 5.9 0.006 0.059 0.617 0.715 dbcsr_redistribute 9 5.0 0.367 0.420 0.620 0.655 make_images_data 18 8.0 0.001 0.001 0.421 0.518 dbcsr_data_copy_aa2 18 7.5 0.464 0.511 0.464 0.511 dbcsr_checksum 6 5.0 0.405 0.493 0.501 0.501 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.841000000000008, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=59.345, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=15.269, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=15.102, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.607, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="tree_to_linear_d", label="tree_to_linear_d", y=3.585, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.63, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.745000000000001, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.367, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.894, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=14.764, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.139, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="tree_to_linear_d", label="tree_to_linear_d", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.731, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.473, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.196, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.063 0.063 155.170 155.170 qs_mol_dyn_low 1 2.0 0.005 0.005 152.993 152.993 velocity_verlet 5 3.0 0.005 0.005 123.880 123.880 qmmm_el_coupling 6 3.8 0.000 0.000 73.643 73.643 qmmm_elec_with_gaussian 6 4.8 0.165 0.165 73.636 73.636 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 72.651 72.651 qmmm_elec_gaussian_low_G 6 6.8 71.130 71.130 71.130 71.130 qs_forces 6 3.8 0.001 0.001 56.962 56.962 qs_energies 6 4.8 0.001 0.001 50.750 50.750 scf_env_do_scf 6 5.8 0.001 0.001 46.535 46.535 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 38.798 38.798 rebuild_ks_matrix 45 8.4 0.000 0.000 38.367 38.367 qs_ks_build_kohn_sham_matrix 45 9.4 0.009 0.009 38.367 38.367 qs_ks_update_qs_env 45 7.8 0.000 0.000 32.926 32.926 pw_transfer 966 12.3 0.076 0.076 23.943 23.943 fft_wrap_pw1pw2 801 13.6 0.010 0.010 23.603 23.603 fft_wrap_pw1pw2_150 507 15.2 2.330 2.330 23.058 23.058 qs_vxc_create 45 10.4 0.001 0.001 19.444 19.444 xc_vxc_pw_create 45 11.4 1.009 1.009 19.443 19.443 fist_calc_energy_force 6 3.8 0.002 0.002 13.238 13.238 xc_pw_derive 270 13.4 0.003 0.003 12.968 12.968 force_nonbond 6 4.8 11.824 11.824 11.824 11.824 xc_rho_set_and_dset_create 45 12.4 1.334 1.334 11.065 11.065 pw_scatter_s 429 15.8 10.906 10.906 10.906 10.906 qs_rho_update_rho 45 7.9 0.000 0.000 10.395 10.395 calculate_rho_elec 45 8.9 0.905 0.905 10.395 10.395 fft3d_s 802 15.6 8.901 8.901 8.912 8.912 qmmm_forces 6 3.8 0.002 0.002 8.296 8.296 pw_integral_ab 2539 7.4 7.974 7.974 7.974 7.974 qmmm_forces_with_gaussian 6 4.8 0.163 0.163 7.773 7.773 init_scf_loop 6 6.8 0.000 0.000 7.731 7.731 xc_pw_divergence 45 12.4 0.002 0.002 7.276 7.276 qs_ks_ddapc 45 10.4 0.001 0.001 7.024 7.024 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.639 6.639 pw_poisson_solve 51 9.9 2.472 2.472 5.617 5.617 qmmm_forces_gaussian_low_G 6 6.8 5.559 5.559 5.559 5.559 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.454 5.454 density_rs2pw 45 9.9 0.003 0.003 4.780 4.780 grid_collocate_task_list 45 9.9 4.710 4.710 4.710 4.710 sum_up_and_integrate 45 10.4 0.254 0.254 4.424 4.424 cp_ddapc_apply_CD 45 11.4 0.007 0.007 4.390 4.390 integrate_v_rspace 45 11.4 0.013 0.013 4.170 4.170 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.039 80.974 80.975 qs_mol_dyn_low 1 2.0 0.005 0.006 79.336 79.429 qs_forces 6 3.8 0.001 0.001 58.759 58.759 qs_energies 6 4.8 0.001 0.001 56.098 56.098 scf_env_do_scf 6 5.8 0.000 0.001 54.693 54.693 scf_env_do_scf_inner_loop 113 6.2 0.003 0.010 52.540 52.541 rebuild_ks_matrix 119 8.1 0.000 0.001 38.288 38.305 qs_ks_build_kohn_sham_matrix 119 9.1 0.022 0.024 38.287 38.304 qs_ks_update_qs_env 119 7.3 0.001 0.001 36.083 36.100 velocity_verlet 5 3.0 0.002 0.002 32.299 32.303 pw_transfer 2446 12.3 0.268 0.295 24.114 24.350 fft_wrap_pw1pw2 2059 13.4 0.032 0.034 23.410 23.663 fft_wrap_pw1pw2_150 1321 14.9 2.142 2.390 22.586 22.909 qs_vxc_create 119 10.1 0.004 0.005 19.483 19.493 xc_vxc_pw_create 119 11.1 0.220 0.285 19.479 19.488 fft3d_ps 2059 15.4 10.391 11.428 17.477 18.040 qs_rho_update_rho 119 7.3 0.001 0.001 15.171 15.172 calculate_rho_elec 119 8.3 0.088 0.097 15.170 15.171 xc_pw_derive 714 13.1 0.015 0.017 14.485 14.943 sum_up_and_integrate 119 10.1 0.072 0.083 13.923 13.962 integrate_v_rspace 119 11.1 0.005 0.005 13.851 13.893 qmmm_forces 6 3.8 0.002 0.003 11.316 11.316 qmmm_forces_with_gaussian 6 4.8 0.305 0.398 10.977 11.169 rs_pw_transfer 988 11.5 0.015 0.017 9.756 10.262 xc_pw_divergence 119 12.1 0.008 0.010 9.686 10.023 xc_rho_set_and_dset_create 119 12.1 0.572 0.697 9.208 9.581 density_rs2pw 119 9.3 0.010 0.011 8.819 9.203 qmmm_el_coupling 6 3.8 0.000 0.000 8.117 8.166 qmmm_elec_with_gaussian 6 4.8 0.293 0.403 8.114 8.163 potential_pw2rs 119 12.1 0.010 0.011 7.720 7.732 grid_collocate_task_list 119 9.3 6.048 6.455 6.048 6.455 mp_alltoall_z22v 2059 17.4 4.497 6.064 4.497 6.064 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.802 6.015 grid_integrate_task_list 119 12.1 5.680 5.965 5.680 5.965 qmmm_forces_gaussian_low_G 6 6.8 4.759 4.970 4.759 4.970 x_to_yz 1095 16.8 1.540 1.812 3.765 4.225 yz_to_x 964 16.0 1.000 1.235 3.273 4.177 rs_pw_transfer_PW2RS_150 125 13.9 1.980 2.073 3.926 3.955 rs_pw_transfer_RS2PW_150 125 11.2 1.609 1.745 3.426 3.938 qs_scf_new_mos 113 7.2 0.001 0.001 3.910 3.920 qs_scf_loop_do_ot 113 8.2 0.001 0.001 3.909 3.919 pw_restrict_s3 18 5.8 2.180 2.213 3.791 3.834 ot_scf_mini 113 9.2 0.002 0.002 3.738 3.749 mp_waitany 4028 12.8 3.043 3.699 3.043 3.699 dbcsr_multiply_generic 2588 12.3 0.102 0.118 3.498 3.581 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.368 3.526 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 3.115 3.162 pw_prolongate_s3 18 6.8 1.767 1.815 3.115 3.162 qs_ks_ddapc 119 10.1 0.003 0.004 2.576 2.713 qmmm_elec_gaussian_low_G 6 6.8 2.435 2.580 2.435 2.580 mp_sum_dm3 33 5.7 2.314 2.442 2.314 2.442 ot_mini 113 10.2 0.001 0.001 2.353 2.365 pw_integral_ab 2761 7.7 1.772 1.804 2.137 2.332 mp_waitall_1 188862 16.2 2.051 2.272 2.051 2.272 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.215 2.216 init_scf_loop 6 6.8 0.000 0.000 2.149 2.149 pw_gather_p 964 15.0 1.901 2.060 1.901 2.060 pw_scatter_p 1095 15.8 1.803 1.962 1.803 1.962 qs_ot_get_derivative 113 11.2 0.001 0.001 1.858 1.867 mp_sum_d 5820 12.2 1.145 1.627 1.145 1.627 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=34.166, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=71.13, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=11.824, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=10.906, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.901, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=7.974, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.559, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.71, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=45.392, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.435, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.772, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.759, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.048, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.68, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=10.391, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=4.497, yerr=0.0 Summary: Performance test took 65 minutes. Status: OK Removing intermediate container f2fafcae0a7a ---> c337fb773870 Step 41/41 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 829af92b2570 Removing intermediate container 829af92b2570 ---> 3b6c71a0c6b5 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 3b6c71a0c6b5 Successfully tagged gcr.io/cp2k-org-project/img_cp2k-perf-openmp-arch-b51:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2022-03-21 20:40:12+00:00