StartDate: 2023-07-19 14:53:17+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: 65cca6c7165d0c817769edb47a3ccdcda48d696c CommitTime: 2023-07-19 14:34:59 +0200 CommitAuthor: Ole Schütt CommitSubject: tddfpt: Add default initializers #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=65cca6c7165d0c817769edb47a3ccdcda48d696c Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 383.3MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 3153aa388d02: Pulling fs layer 3153aa388d02: Verifying Checksum 3153aa388d02: Download complete 3153aa388d02: Pull complete Digest: sha256:0bced47fffa3361afa981854fcabcd4577cd43cebbb808cea2b1f33a3dd7f508 Status: Downloaded newer image for ubuntu:22.04 ---> 5a81c4b8502e Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 7aa07568c8cc Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 17e4a8d90f26 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 1e89266971ea Step 5/42 : RUN mkdir scripts ---> Using cache ---> 2d1625000b51 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> c29ce2c49e10 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> 06abfb02151d Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> 71c06419ade2 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 3b86893d3eaf Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> fa2bc79a1e11 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> ac99b22dec4a Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 06a8e152e1b3 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> 89fc192bff3f Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 82b714d305b1 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 2bd05b9ec84c Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> de5407ccb74f Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 30d68b836e0d Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 5ebeafe463ec Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 5523dbc2a3b4 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 2a06eb904563 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> df288395032a Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> 55c9169692cf Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 32123347419c Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> d729d8a72ea9 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> d3870dc56b35 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> ea8ec7f16e82 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> c7b4b1a90391 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 6a0427b19587 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> a4faf0fa9ca5 Step 30/42 : COPY ./Makefile . ---> Using cache ---> 8e69ed2029cf Step 31/42 : COPY ./src ./src ---> Using cache ---> cdad210418e1 Step 32/42 : COPY ./exts ./exts ---> Using cache ---> bf1a793dbfe3 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> 2e1a456cb737 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && rm -rf lib obj && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in 269409baf53c './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 269409baf53c ---> 3e8b56d52aef Step 35/42 : COPY ./data ./data ---> 2931d9916074 Step 36/42 : COPY ./tests ./tests ---> dd3b66229b9f Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> f8bd2894ffc7 Step 38/42 : COPY ./benchmarks ./benchmarks ---> 19c2d0712106 Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 491bdb269a91 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in 4ab3adb5be1c ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 70 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.059 0.059 108.619 108.619 qs_mol_dyn_low 1 2.0 0.003 0.003 107.906 107.906 qs_forces 11 3.9 0.002 0.002 107.862 107.862 qs_energies 11 4.9 0.001 0.001 100.346 100.346 scf_env_do_scf 11 5.9 0.001 0.001 85.975 85.975 velocity_verlet 10 3.0 0.002 0.002 72.643 72.643 scf_env_do_scf_inner_loop 108 6.5 0.013 0.013 66.168 66.168 rebuild_ks_matrix 119 8.3 0.001 0.001 26.548 26.548 qs_ks_build_kohn_sham_matrix 119 9.3 0.016 0.016 26.547 26.547 qs_ks_update_qs_env 119 7.6 0.001 0.001 25.280 25.280 dbcsr_multiply_generic 2286 12.5 0.199 0.199 23.530 23.530 qs_scf_new_mos 108 7.5 0.001 0.001 23.258 23.258 qs_scf_loop_do_ot 108 8.5 0.001 0.001 23.257 23.257 qs_rho_update_rho_low 119 7.7 0.001 0.001 22.543 22.543 calculate_rho_elec 119 8.7 1.106 1.106 22.542 22.542 ot_scf_mini 108 9.5 0.003 0.003 21.506 21.506 init_scf_loop 11 6.9 0.000 0.000 19.610 19.610 grid_collocate_task_list 119 9.7 17.610 17.610 17.610 17.610 sum_up_and_integrate 119 10.3 1.323 1.323 16.444 16.444 prepare_preconditioner 11 7.9 0.000 0.000 16.166 16.166 make_preconditioner 11 8.9 0.000 0.000 16.166 16.166 integrate_v_rspace 119 11.3 0.128 0.128 15.120 15.120 make_full_inverse_cholesky 11 9.9 0.021 0.021 14.607 14.607 ot_mini 108 10.5 0.001 0.001 13.956 13.956 make_m2s 4572 13.5 0.052 0.052 12.203 12.203 grid_integrate_task_list 119 12.3 12.171 12.171 12.171 12.171 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 8.316 8.316 qs_ot_get_derivative 108 11.5 0.001 0.001 7.221 7.221 pw_transfer 1439 11.6 0.083 0.083 7.216 7.216 multiply_cannon 2286 13.5 0.240 0.240 7.033 7.033 fft_wrap_pw1pw2 1201 12.6 0.008 0.008 6.915 6.915 ot_diis_step 108 11.5 0.005 0.005 6.731 6.731 make_images 4572 14.5 2.726 2.726 6.501 6.501 multiply_cannon_loop 2286 14.5 0.054 0.054 6.454 6.454 multiply_cannon_multrec 2286 15.5 6.334 6.334 6.399 6.399 dbcsr_make_dense_low 5837 15.5 0.086 0.086 6.099 6.099 make_dense_data 5837 16.5 5.369 5.369 5.998 5.998 fft_wrap_pw1pw2_140 487 13.2 0.499 0.499 5.811 5.811 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.784 5.784 apply_single 119 13.6 0.001 0.001 5.783 5.783 cp_fm_cholesky_decompose 22 10.9 5.601 5.601 5.601 5.601 dbcsr_complete_redistribute 329 12.2 3.015 3.015 5.274 5.274 dbcsr_make_images_dense 3978 14.8 0.018 0.018 5.254 5.254 init_scf_run 11 5.9 0.002 0.002 5.182 5.182 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.181 5.181 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.820 4.820 wfi_extrapolate 11 7.9 0.001 0.001 4.613 4.613 qs_create_task_list 11 7.9 0.000 0.000 4.358 4.358 generate_qs_task_list 11 8.9 2.241 2.241 4.358 4.358 copy_dbcsr_to_fm 153 11.3 0.003 0.003 4.220 4.220 cp_fm_cholesky_invert 11 10.9 3.967 3.967 3.967 3.967 dbcsr_copy 2102 12.0 0.305 0.305 3.933 3.933 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.887 3.887 density_rs2pw 119 9.7 0.006 0.006 3.826 3.826 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.625 3.625 dbcsr_copy_into_existing 22 7.9 3.554 3.554 3.554 3.554 qs_ot_get_p 119 10.4 0.001 0.001 3.496 3.496 pw_poisson_solve 119 10.3 0.858 0.858 3.472 3.472 fft3d_s 1202 14.6 3.417 3.417 3.423 3.423 dbcsr_dot_sd 1205 11.9 3.368 3.368 3.371 3.371 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 3.142 3.142 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.957 2.957 potential_pw2rs 119 12.3 0.100 0.100 2.821 2.821 copy_fm_to_dbcsr 176 11.2 0.001 0.001 2.631 2.631 dbcsr_data_release 279532 16.0 2.515 2.515 2.515 2.515 pw_integral_ab 119 11.3 2.371 2.371 2.371 2.371 evaluate_core_matrix_traces 119 8.3 0.001 0.001 2.356 2.356 calculate_ptrace_kp 238 9.3 0.001 0.001 2.356 2.356 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 2.283 2.283 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 2.263 2.263 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.211 2.211 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.022 0.039 54.530 54.540 qs_mol_dyn_low 1 2.0 0.003 0.004 54.238 54.244 qs_forces 11 3.9 0.002 0.002 54.194 54.195 qs_energies 11 4.9 0.001 0.001 50.682 50.685 scf_env_do_scf 11 5.9 0.001 0.002 46.364 46.365 scf_env_do_scf_inner_loop 108 6.5 0.003 0.023 42.764 42.764 velocity_verlet 10 3.0 0.001 0.003 32.487 32.489 rebuild_ks_matrix 119 8.3 0.001 0.001 19.563 19.633 qs_ks_build_kohn_sham_matrix 119 9.3 0.019 0.021 19.563 19.632 qs_ks_update_qs_env 119 7.6 0.001 0.001 17.378 17.445 dbcsr_multiply_generic 2286 12.5 0.090 0.092 15.937 16.023 qs_rho_update_rho_low 119 7.7 0.001 0.001 15.850 15.856 calculate_rho_elec 119 8.7 0.033 0.035 15.849 15.855 sum_up_and_integrate 119 10.3 0.042 0.045 14.445 14.466 integrate_v_rspace 119 11.3 0.005 0.006 14.403 14.424 qs_scf_new_mos 108 7.5 0.001 0.001 12.836 12.866 qs_scf_loop_do_ot 108 8.5 0.001 0.001 12.836 12.865 ot_scf_mini 108 9.5 0.003 0.003 12.109 12.134 multiply_cannon 2286 13.5 0.162 0.176 11.568 11.969 multiply_cannon_loop 2286 14.5 0.112 0.120 10.854 11.047 grid_collocate_task_list 119 9.7 10.467 10.829 10.467 10.829 grid_integrate_task_list 119 12.3 9.417 9.709 9.417 9.709 mp_waitall_1 158411 16.6 8.871 9.132 8.871 9.132 multiply_cannon_metrocomm3 18288 15.5 0.052 0.056 6.768 7.026 ot_mini 108 10.5 0.001 0.001 6.965 6.993 rs_pw_transfer 974 11.9 0.016 0.017 5.145 5.622 pw_transfer 1439 11.6 0.080 0.086 5.052 5.131 density_rs2pw 119 9.7 0.007 0.007 4.601 5.064 fft_wrap_pw1pw2 1201 12.6 0.011 0.012 4.885 4.961 fft_wrap_pw1pw2_140 487 13.2 0.878 0.933 4.151 4.317 potential_pw2rs 119 12.3 0.008 0.008 4.227 4.239 multiply_cannon_multrec 18288 15.5 3.644 3.756 3.658 3.771 qs_ot_get_derivative 108 11.5 0.001 0.001 3.560 3.585 init_scf_loop 11 6.9 0.000 0.000 3.576 3.576 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.397 3.445 apply_single 119 13.6 0.000 0.001 3.397 3.444 ot_diis_step 108 11.5 0.004 0.005 3.382 3.382 fft3d_ps 1201 14.6 1.621 1.743 3.109 3.254 make_m2s 4572 13.5 0.053 0.062 3.022 3.095 init_scf_run 11 5.9 0.000 0.005 3.045 3.046 scf_env_initial_rho_setup 11 6.9 0.000 0.004 3.045 3.046 wfi_extrapolate 11 7.9 0.001 0.001 2.734 2.734 make_images 4572 14.5 0.129 0.135 2.642 2.718 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.363 2.370 mp_waitany 9880 13.7 1.742 2.238 1.742 2.238 qs_ot_get_p 119 10.4 0.001 0.001 1.745 1.794 rs_pw_transfer_RS2PW_140 130 11.5 0.242 0.259 1.221 1.698 rs_pw_transfer_PW2RS_140 130 13.9 0.490 0.523 1.564 1.607 make_images_data 4572 15.5 0.042 0.048 1.458 1.583 prepare_preconditioner 11 7.9 0.000 0.000 1.395 1.412 make_preconditioner 11 8.9 0.000 0.000 1.395 1.412 hybrid_alltoall_any 4725 16.4 0.075 0.218 1.292 1.412 mp_alltoall_d11v 2130 13.8 1.032 1.294 1.032 1.294 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.268 1.288 mp_alltoall_z22v 1201 16.6 1.153 1.282 1.153 1.282 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.213 1.231 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 1.139 1.141 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.001 1.033 1.124 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.085 1.104 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=61.534000000000006, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.61, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=12.171, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=6.334, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=5.601, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.369, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=20.389000000000003, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.467, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.417, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.644, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.871, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.742, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 131.301 131.301 qs_mol_dyn_low 1 2.0 0.003 0.003 130.561 130.561 qs_forces 11 3.9 0.001 0.001 130.519 130.519 qs_energies 11 4.9 0.001 0.001 121.649 121.649 scf_env_do_scf 11 5.9 0.001 0.001 107.009 107.009 scf_env_do_scf_inner_loop 96 6.5 0.012 0.012 86.131 86.131 velocity_verlet 10 3.0 0.002 0.002 83.420 83.420 rebuild_ks_matrix 107 8.3 0.001 0.001 40.449 40.449 qs_ks_build_kohn_sham_matrix 107 9.3 0.014 0.014 40.449 40.449 qs_rho_update_rho_low 107 7.7 0.001 0.001 37.567 37.567 calculate_rho_elec 107 8.7 0.927 0.927 37.566 37.566 qs_ks_update_qs_env 107 7.6 0.001 0.001 36.336 36.336 grid_collocate_task_list 107 9.7 32.874 32.874 32.874 32.874 sum_up_and_integrate 107 10.3 0.807 0.807 32.217 32.217 integrate_v_rspace 107 11.3 0.109 0.109 31.409 31.409 grid_integrate_task_list 107 12.3 28.979 28.979 28.979 28.979 dbcsr_multiply_generic 1966 12.4 0.160 0.160 21.086 21.086 init_scf_loop 11 6.9 0.000 0.000 20.717 20.717 qs_scf_new_mos 96 7.5 0.001 0.001 20.369 20.369 qs_scf_loop_do_ot 96 8.5 0.001 0.001 20.369 20.369 ot_scf_mini 96 9.5 0.003 0.003 18.861 18.861 prepare_preconditioner 11 7.9 0.000 0.000 16.012 16.012 make_preconditioner 11 8.9 0.000 0.000 16.012 16.012 make_full_inverse_cholesky 11 9.9 0.027 0.027 14.727 14.727 ot_mini 96 10.5 0.001 0.001 11.876 11.876 make_m2s 3932 13.4 0.046 0.046 10.997 10.997 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.489 7.489 pw_transfer 1295 11.6 0.074 0.074 6.956 6.956 fft_wrap_pw1pw2 1081 12.6 0.007 0.007 6.696 6.696 init_scf_run 11 5.9 0.002 0.002 6.406 6.406 scf_env_initial_rho_setup 11 6.9 0.001 0.001 6.403 6.403 qs_ot_get_derivative 96 11.5 0.001 0.001 6.355 6.355 multiply_cannon 1966 13.4 0.231 0.231 6.312 6.312 fft_wrap_pw1pw2_140 439 13.2 0.578 0.578 5.776 5.776 multiply_cannon_loop 1966 14.4 0.069 0.069 5.700 5.700 wfi_extrapolate 11 7.9 0.001 0.001 5.695 5.695 make_images 3932 14.4 2.239 2.239 5.674 5.674 multiply_cannon_multrec 1966 15.4 5.578 5.578 5.630 5.630 cp_fm_cholesky_decompose 22 10.9 5.620 5.620 5.620 5.620 dbcsr_make_dense_low 4961 15.5 0.102 0.102 5.553 5.553 ot_diis_step 96 11.5 0.004 0.004 5.518 5.518 make_dense_data 4961 16.5 4.895 4.895 5.438 5.438 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.180 5.180 apply_single 107 13.6 0.000 0.000 5.180 5.180 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 5.034 5.034 dbcsr_make_images_dense 3386 14.7 0.016 0.016 4.908 4.908 cp_fm_cholesky_invert 11 10.9 4.727 4.727 4.727 4.727 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.245 4.245 dbcsr_complete_redistribute 317 12.2 2.061 2.061 3.982 3.982 dbcsr_copy 1855 11.9 0.276 0.276 3.865 3.865 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.834 3.834 qs_create_task_list 11 7.9 0.000 0.000 3.815 3.815 generate_qs_task_list 11 8.9 2.420 2.420 3.815 3.815 density_rs2pw 107 9.7 0.005 0.005 3.764 3.764 dbcsr_copy_into_existing 22 7.9 3.502 3.502 3.503 3.503 qs_ot_get_p 107 10.4 0.001 0.001 3.426 3.426 fft3d_s 1082 14.6 3.260 3.260 3.266 3.266 copy_dbcsr_to_fm 147 11.2 0.003 0.003 3.261 3.261 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.806 2.806 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.025 88.897 88.907 qs_mol_dyn_low 1 2.0 0.004 0.007 88.744 88.749 qs_forces 11 3.9 0.002 0.002 88.697 88.698 qs_energies 11 4.9 0.001 0.001 82.733 82.737 scf_env_do_scf 11 5.9 0.001 0.002 76.410 76.413 scf_env_do_scf_inner_loop 96 6.5 0.003 0.021 70.653 70.654 velocity_verlet 10 3.0 0.001 0.003 53.250 53.252 rebuild_ks_matrix 107 8.3 0.001 0.001 38.502 38.565 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.019 38.502 38.564 qs_ks_update_qs_env 107 7.6 0.001 0.001 33.976 34.031 sum_up_and_integrate 107 10.3 0.052 0.058 33.471 33.497 integrate_v_rspace 107 11.3 0.005 0.005 33.419 33.446 qs_rho_update_rho_low 107 7.7 0.001 0.001 32.517 32.525 calculate_rho_elec 107 8.7 0.031 0.033 32.516 32.524 grid_integrate_task_list 107 12.3 27.578 28.090 27.578 28.090 grid_collocate_task_list 107 9.7 26.398 27.096 26.398 27.096 dbcsr_multiply_generic 1966 12.4 0.085 0.092 14.468 14.599 qs_scf_new_mos 96 7.5 0.001 0.001 11.406 11.476 qs_scf_loop_do_ot 96 8.5 0.001 0.001 11.405 11.475 ot_scf_mini 96 9.5 0.002 0.003 10.697 10.770 multiply_cannon 1966 13.4 0.148 0.164 10.309 10.628 multiply_cannon_loop 1966 14.4 0.101 0.110 9.600 9.852 mp_waitall_1 136719 16.5 7.633 7.937 7.633 7.937 rs_pw_transfer 878 11.9 0.014 0.017 5.658 6.433 ot_mini 96 10.5 0.001 0.001 6.178 6.247 density_rs2pw 107 9.7 0.006 0.006 5.362 6.151 multiply_cannon_metrocomm3 15728 15.4 0.045 0.048 5.768 6.013 init_scf_loop 11 6.9 0.000 0.000 5.735 5.735 pw_transfer 1295 11.6 0.073 0.079 5.264 5.324 fft_wrap_pw1pw2 1081 12.6 0.010 0.010 5.106 5.166 init_scf_run 11 5.9 0.000 0.005 4.930 4.930 scf_env_initial_rho_setup 11 6.9 0.000 0.004 4.929 4.930 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.714 4.723 wfi_extrapolate 11 7.9 0.001 0.002 4.495 4.496 fft_wrap_pw1pw2_140 439 13.2 0.976 1.014 4.327 4.415 potential_pw2rs 107 12.3 0.007 0.008 4.132 4.142 multiply_cannon_multrec 15728 15.4 3.453 3.644 3.465 3.657 mp_waitany 8968 13.7 2.424 3.214 2.424 3.214 qs_ot_get_derivative 96 11.5 0.001 0.001 3.116 3.187 fft3d_ps 1081 14.6 1.553 1.649 3.047 3.139 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.052 3.112 apply_single 107 13.6 0.000 0.001 3.052 3.112 ot_diis_step 96 11.5 0.004 0.004 3.041 3.041 make_m2s 3932 13.4 0.048 0.052 2.882 2.941 mp_alltoall_d11v 1998 13.7 1.940 2.767 1.940 2.767 rs_pw_transfer_RS2PW_140 118 11.5 0.279 0.302 1.969 2.750 make_images 3932 14.4 0.122 0.126 2.512 2.569 rs_gather_matrices 107 12.3 0.153 0.179 1.625 2.440 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=53.35499999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=32.874, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=28.979, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=5.62, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.578, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.895, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=21.411, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=26.398, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.578, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.453, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.424, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.633, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.228 0.228 126.320 126.320 qs_energies 1 2.0 0.000 0.000 125.391 125.391 scf_env_do_scf 1 3.0 0.000 0.000 124.087 124.087 qs_ks_update_qs_env 8 5.0 0.000 0.000 117.895 117.895 rebuild_ks_matrix 7 6.0 0.000 0.000 117.836 117.836 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 117.836 117.836 hfx_ks_matrix 7 8.0 0.000 0.000 108.154 108.154 integrate_four_center 7 9.0 2.630 2.630 108.065 108.065 integrate_four_center_main 7 10.0 0.787 0.787 98.404 98.404 integrate_four_center_bin 448 11.0 97.617 97.617 97.617 97.617 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 65.955 65.955 init_scf_loop 1 4.0 0.000 0.000 58.122 58.122 integrate_four_center_load 7 10.0 0.001 0.001 6.677 6.677 hfx_load_balance 1 11.0 0.001 0.001 6.676 6.676 qs_vxc_create 14 8.0 0.000 0.000 3.488 3.488 xc_vxc_pw_create 14 9.0 0.165 0.165 3.487 3.487 hfx_load_balance_count 1 12.0 3.337 3.337 3.337 3.337 hfx_load_balance_bin 1 12.0 3.321 3.321 3.321 3.321 prepare_preconditioner 1 5.0 0.000 0.000 3.093 3.093 make_preconditioner 1 6.0 0.000 0.000 3.093 3.093 calculate_rho_elec 15 7.4 0.125 0.125 2.811 2.811 xc_rho_set_and_dset_create 14 10.0 0.115 0.115 2.643 2.643 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.221 0.242 115.481 115.492 qs_energies 1 2.0 0.000 0.000 115.080 115.088 scf_env_do_scf 1 3.0 0.000 0.000 114.676 114.676 qs_ks_update_qs_env 8 5.0 0.000 0.000 112.181 112.182 rebuild_ks_matrix 7 6.0 0.000 0.000 112.168 112.169 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 112.168 112.169 hfx_ks_matrix 7 8.0 0.000 0.000 105.313 105.316 integrate_four_center 7 9.0 0.058 0.383 105.302 105.304 integrate_four_center_main 7 10.0 0.004 0.004 94.890 96.583 integrate_four_center_bin 448 11.0 94.886 96.579 94.886 96.579 scf_env_do_scf_inner_loop 7 4.0 0.000 0.002 64.135 64.135 init_scf_loop 1 4.0 0.000 0.000 50.539 50.539 integrate_four_center_load 7 10.0 0.000 0.000 6.669 6.670 hfx_load_balance 1 11.0 0.001 0.001 6.669 6.670 mp_sync 56 11.2 2.984 4.241 2.984 4.241 hfx_load_balance_count 1 12.0 3.245 3.334 3.245 3.334 hfx_load_balance_bin 1 12.0 3.237 3.332 3.237 3.332 qs_vxc_create 14 8.0 0.000 0.001 3.025 3.025 xc_vxc_pw_create 14 9.0 0.009 0.011 3.024 3.024 xc_rho_set_and_dset_create 14 10.0 0.015 0.018 2.313 2.437 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=18.39999999999999, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=97.617, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.337, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.321, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.63, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.787, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.228, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=10.84599999999999, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=94.886, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.245, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.237, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.058, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.221, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=2.984, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 86.688 86.688 qs_energies 1 2.0 0.000 0.000 86.251 86.251 mp2_main 1 3.0 0.000 0.000 83.166 83.166 mp2_gpw_main 1 4.0 0.000 0.000 83.054 83.054 rpa_ri_compute_en 1 5.0 0.000 0.000 79.080 79.080 rpa_num_int 1 6.0 0.001 0.001 79.074 79.074 compute_mat_P_omega 1 7.0 0.004 0.004 67.850 67.850 compute_mat_P_omega_contract 10 8.0 9.608 9.608 67.626 67.626 dbt_total 2336 9.6 0.017 0.017 53.111 53.111 dbt_contract 787 11.0 0.073 0.073 45.166 45.166 dbt_tas_total 1149 12.2 0.298 0.298 43.468 43.468 dbt_tas_multiply 807 12.1 0.002 0.002 42.097 42.097 dbt_tas_dbm 807 14.1 0.004 0.004 35.000 35.000 dbm_multiply 807 16.1 34.989 34.989 34.989 34.989 dbt_tas_mm_1N 524 15.1 0.002 0.002 25.428 25.428 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 23.731 23.731 compute_mat_P_omega_calc_M_occ 250 9.0 9.657 9.657 18.826 18.826 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.029 9.029 dbt_tas_mm_2 251 15.0 0.002 0.002 7.829 7.829 dbt_copy 1103 10.7 0.103 0.103 6.552 6.552 compute_QP_energies 1 7.0 0.000 0.000 5.854 5.854 compute_self_energy_cubic_gw 1 8.0 0.059 0.059 5.853 5.853 contract_cubic_gw 21 9.0 0.000 0.000 4.739 4.739 dbt_tas_reserve_blocks_index 3261 14.3 0.505 0.505 4.356 4.356 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.967 3.967 dbm_reserve_blocks 3628 15.3 3.921 3.921 3.921 3.921 dbt_reserve_blocks_index 2280 13.1 0.069 0.069 3.341 3.341 dbt_reserve_blocks_index_array 2222 12.2 0.011 0.011 3.285 3.285 scf_env_do_scf 1 3.0 0.000 0.000 2.970 2.970 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.970 2.970 dbt_crop 1042 12.0 1.729 1.729 2.908 2.908 dbt_tas_copy 574 11.4 1.536 1.536 2.623 2.623 convert_to_new_pgrid 2421 14.1 0.143 0.143 2.472 2.472 dbm_copy 1614 15.1 2.330 2.330 2.330 2.330 dbt_reshape 278 11.9 1.220 1.220 2.232 2.232 compute_W_cubic_GW 10 7.0 0.005 0.005 2.219 2.219 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.204 2.204 dbt_tas_reshape 367 15.0 0.021 0.021 2.143 2.143 get_2c_integrals 1 6.0 0.000 0.000 1.929 1.929 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.830 1.830 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.026 37.757 37.772 qs_energies 1 2.0 0.000 0.000 37.619 37.621 mp2_main 1 3.0 0.000 0.000 36.451 36.453 mp2_gpw_main 1 4.0 0.000 0.001 36.410 36.412 rpa_ri_compute_en 1 5.0 0.000 0.000 34.980 34.982 rpa_num_int 1 6.0 0.000 0.002 34.980 34.981 dbt_total 2336 9.6 0.018 0.018 30.879 30.886 compute_mat_P_omega 1 7.0 0.001 0.006 29.834 29.852 compute_mat_P_omega_contract 10 8.0 0.453 0.477 29.671 29.675 dbt_contract 787 11.0 0.045 0.046 22.780 22.790 dbt_tas_total 1149 12.2 0.092 0.098 20.127 20.128 dbt_tas_multiply 807 12.1 0.002 0.003 20.032 20.033 dbt_tas_dbm 807 14.1 0.003 0.004 14.627 14.638 dbm_multiply 807 16.1 11.763 12.612 11.763 12.612 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 8.863 8.864 compute_mat_P_omega_calc_M_occ 250 9.0 0.430 0.455 8.622 8.623 dbt_copy 1149 10.8 0.016 0.017 7.018 7.160 dbt_tas_mm_2 251 15.0 0.002 0.002 7.068 7.071 dbt_reshape 1136 11.8 2.981 3.144 6.677 6.814 mp_sync 8688 11.6 5.224 6.155 5.224 6.155 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 6.043 6.043 dbt_tas_mm_1N 524 15.1 0.002 0.002 5.191 5.531 compute_QP_energies 1 7.0 0.000 0.000 3.212 3.213 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.210 3.211 mp_waitall_2 3812 15.3 2.728 2.947 2.728 2.947 dbt_communicate_buffer 1136 12.8 0.057 0.062 2.704 2.834 contract_cubic_gw 21 9.0 0.000 0.000 2.501 2.501 dbt_reserve_blocks_index 2887 13.1 0.075 0.081 2.027 2.167 dbt_reserve_blocks_index_array 2829 12.2 0.009 0.009 2.017 2.154 dbt_tas_reserve_blocks_index 3347 14.5 0.466 0.500 2.008 2.147 dbt_crop 1042 12.0 1.004 1.100 1.637 1.779 dbm_reserve_blocks 3752 15.4 1.647 1.761 1.647 1.761 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.428 1.429 dbt_tas_replicate 405 14.1 0.592 0.772 1.232 1.299 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.234 1.239 parallel_gemm_fm 105 8.4 0.000 0.000 1.147 1.156 parallel_gemm_fm_cosma 105 9.4 1.146 1.156 1.146 1.156 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.130 1.133 scf_env_do_scf 1 3.0 0.000 0.000 1.116 1.116 scf_env_do_scf_inner_loop 17 4.0 0.001 0.003 1.116 1.116 convert_to_new_pgrid 2421 14.1 0.028 0.034 0.927 0.990 dbm_copy 1608 15.1 0.891 0.958 0.891 0.958 mp_max_i 2002 9.8 0.731 0.921 0.731 0.921 compute_W_cubic_GW 10 7.0 0.001 0.001 0.885 0.891 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=24.963, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=34.989, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=9.657, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=9.608, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.921, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.33, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.22, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=11.639999999999997, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=11.763, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.43, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.453, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.647, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.891, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.981, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.224, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.728, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.020 279.473 279.473 qs_forces 1 2.0 0.000 0.000 278.836 278.836 rebuild_ks_matrix 7 6.6 0.000 0.000 277.255 277.255 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 277.255 277.255 hfx_ks_matrix 7 8.6 0.000 0.000 275.096 275.096 hfx_ri_update_ks 7 9.6 0.000 0.000 227.779 227.779 hfx_ri_update_ks_Pmat 7 10.6 38.315 38.315 227.773 227.773 dbt_total 809 11.0 0.008 0.008 211.899 211.899 qs_energies 1 3.0 0.000 0.000 201.202 201.202 scf_env_do_scf 1 4.0 0.000 0.000 200.854 200.854 qs_ks_update_qs_env 8 6.0 0.000 0.000 199.679 199.679 dbt_contract 207 12.4 0.059 0.059 189.299 189.299 dbt_tas_total 343 13.7 2.251 2.251 186.061 186.061 dbt_tas_multiply 216 13.5 0.001 0.001 182.338 182.338 dbt_tas_dbm 216 15.5 0.001 0.001 167.657 167.657 dbm_multiply 216 17.5 167.653 167.653 167.653 167.653 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 160.687 160.687 dbt_tas_mm_2 91 16.5 0.001 0.001 150.321 150.321 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 128.361 128.361 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 77.581 77.581 init_scf_loop 2 5.0 0.000 0.000 72.490 72.490 hfx_ri_update_forces 1 7.0 1.718 1.718 47.313 47.313 hfx_ri_forces_Pmat_3c 1 8.0 4.965 4.965 27.393 27.393 dbt_copy 409 11.7 0.114 0.114 18.866 18.866 precalc_derivatives 1 8.0 2.319 2.319 15.171 15.171 dbt_reshape 132 13.2 7.963 7.963 12.768 12.768 dbt_tas_mm_3T 77 17.1 0.000 0.000 12.172 12.172 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 10.487 10.487 dbt_tas_reserve_blocks_index 1255 15.4 1.145 1.145 9.900 9.900 dbm_reserve_blocks 1397 16.3 8.974 8.974 8.974 8.974 build_3c_derivatives 3 9.0 3.342 3.342 8.807 8.807 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 8.622 8.622 dbt_reserve_blocks_index 818 14.4 0.138 0.138 7.669 7.669 dbt_reserve_blocks_index_array 795 13.4 0.008 0.008 7.547 7.547 dbt_crop 372 13.7 3.706 3.706 6.469 6.469 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.027 59.546 59.556 qs_forces 1 2.0 0.000 0.000 59.373 59.374 rebuild_ks_matrix 7 6.6 0.000 0.000 58.356 58.357 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 58.356 58.357 hfx_ks_matrix 7 8.6 0.000 0.001 57.064 57.074 dbt_total 809 11.0 0.007 0.009 51.019 51.032 dbt_contract 207 12.4 0.030 0.031 39.552 39.563 dbt_tas_total 343 13.7 0.118 0.317 35.319 35.322 dbt_tas_multiply 216 13.5 0.001 0.001 34.929 34.931 hfx_ri_update_ks 7 9.6 0.000 0.000 33.108 33.109 hfx_ri_update_ks_Pmat 7 10.6 1.460 1.543 33.102 33.104 qs_energies 1 3.0 0.000 0.000 31.273 31.273 scf_env_do_scf 1 4.0 0.000 0.001 31.095 31.095 qs_ks_update_qs_env 8 6.0 0.000 0.000 30.269 30.270 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 28.089 28.089 dbt_tas_dbm 216 15.5 0.001 0.001 27.284 27.289 dbm_multiply 216 17.5 25.045 25.675 25.045 25.675 hfx_ri_update_forces 1 7.0 0.071 0.077 23.955 23.965 hfx_ri_forces_Pmat_3c 1 8.0 0.204 0.224 18.125 18.125 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 17.574 17.574 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 14.992 14.992 init_scf_loop 2 5.0 0.000 0.000 13.520 13.520 dbt_tas_mm_2 91 16.5 0.001 0.001 12.502 12.507 dbt_copy 497 12.3 0.014 0.016 10.547 10.702 dbt_reshape 365 13.6 4.302 4.386 8.185 8.325 dbt_tas_mm_3T 77 17.1 0.000 0.000 7.614 7.957 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 6.215 6.216 mp_sync 2769 12.9 5.250 6.037 5.250 6.037 dbt_tas_mm_3N 37 15.4 0.000 0.000 5.371 5.485 precalc_derivatives 1 8.0 0.094 0.100 4.341 4.341 dbt_tas_reserve_blocks_index 1380 15.8 1.013 1.048 3.784 4.018 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.897 3.897 mp_waitall_2 1234 16.4 3.588 3.680 3.588 3.680 dbt_reserve_blocks_index 1051 14.7 0.133 0.142 3.150 3.345 dbt_reserve_blocks_index_array 1028 13.8 0.006 0.007 3.120 3.311 dbm_reserve_blocks 1529 16.7 3.049 3.236 3.049 3.236 dbt_crop 372 13.7 2.068 2.129 3.005 3.110 dbt_communicate_buffer 365 14.6 0.014 0.014 2.551 2.626 build_3c_derivatives 3 9.0 0.262 0.286 2.406 2.417 dbt_tas_replicate 149 15.4 0.823 0.861 2.098 2.151 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 2.109 2.110 convert_to_new_pgrid 648 15.5 0.039 0.075 1.719 1.956 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.941 1.945 dbm_copy 452 16.3 1.527 1.759 1.527 1.759 dbt_tas_copy 132 12.5 0.788 0.818 1.484 1.579 dbt_tas_communicate_buffer 328 16.8 0.012 0.012 1.169 1.208 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=51.60300000000004, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=167.653, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=38.315, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=8.974, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=7.963, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.965, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=16.647999999999996, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=25.045, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.46, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.049, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=4.302, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.204, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=5.25, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.588, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 189.274 189.274 qs_energies 1 2.0 0.000 0.000 189.059 189.059 mp2_main 1 3.0 0.000 0.000 183.712 183.712 mp2_gpw_main 1 4.0 0.001 0.001 183.294 183.294 mp2_ri_gpw_compute_in 1 5.0 0.401 0.401 133.015 133.015 mp2_ri_gpw_compute_in_loop 1 6.0 0.013 0.013 122.756 122.756 mp2_eri_3c_integrate_gpw 2656 7.0 0.020 0.020 91.214 91.214 integrate_v_rspace 2666 8.0 0.688 0.688 76.687 76.687 grid_integrate_task_list 2666 9.0 73.890 73.890 73.890 73.890 mp2_ri_gpw_compute_en 1 5.0 0.095 0.095 50.252 50.252 mp2_ri_gpw_compute_en_RI_loop 1 6.0 11.031 11.031 48.197 48.197 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.729 2.729 27.829 27.829 local_gemm 2080 8.0 25.100 25.100 25.100 25.100 dbcsr_multiply_generic 5322 8.0 0.223 0.223 24.518 24.518 ao_to_mo_and_store_B_mult_1 2656 7.0 0.013 0.013 24.464 24.464 pw_transfer 63872 10.6 1.058 1.058 13.340 13.340 multiply_cannon 5322 9.0 0.521 0.521 13.296 13.296 calculate_wavefunction 2656 8.0 8.855 8.855 13.206 13.206 fft_wrap_pw1pw2 53228 11.4 0.114 0.114 12.034 12.034 multiply_cannon_loop 5322 10.0 0.226 0.226 11.759 11.759 multiply_cannon_multrec 5322 11.0 9.949 9.949 9.995 9.995 get_2c_integrals 1 6.0 0.000 0.000 9.857 9.857 compute_2c_integrals 1 7.0 0.006 0.006 8.833 8.833 compute_2c_integrals_loop_lm 1 8.0 0.013 0.013 8.820 8.820 mp2_eri_2c_integrate_gpw 1 9.0 0.948 0.948 8.808 8.808 make_m2s 10644 9.0 0.069 0.069 8.795 8.795 make_images 10644 10.0 3.371 3.371 8.424 8.424 fft_wrap_pw1pw2_20 21271 12.4 0.559 0.559 8.162 8.162 fft3d_s 53229 13.4 7.418 7.418 7.455 7.455 mp2_ri_gpw_compute_en_ener 2080 7.0 7.101 7.101 7.101 7.101 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.861 2.861 6.950 6.950 scf_env_do_scf 1 3.0 0.000 0.000 4.899 4.899 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.899 4.899 copy_dbcsr_to_fm 2679 8.0 0.033 0.033 4.653 4.653 potential_pw2rs 5322 10.0 0.157 0.157 4.476 4.476 collocate_single_gaussian 2656 10.0 0.149 0.149 3.888 3.888 mp2_eri_2c_integrate_gpw_pot_l 2656 10.0 0.004 0.004 3.856 3.856 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.027 47.318 47.329 qs_energies 1 2.0 0.000 0.001 46.849 46.849 mp2_main 1 3.0 0.000 0.001 44.566 44.567 mp2_gpw_main 1 4.0 0.001 0.002 44.462 44.463 mp2_ri_gpw_compute_en 1 5.0 0.261 0.270 23.809 24.173 mp2_ri_gpw_compute_en_RI_loop 1 6.0 4.970 5.210 22.336 22.347 mp2_ri_gpw_compute_in 1 5.0 0.045 0.046 20.563 21.157 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 19.059 19.647 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 16.537 17.004 integrate_v_rspace 93 8.1 0.123 0.149 16.395 16.849 grid_integrate_task_list 93 9.1 16.023 16.451 16.023 16.451 mp2_ri_gpw_compute_en_expansio 65 7.0 0.194 0.212 12.768 12.974 local_gemm 65 8.0 12.575 12.785 12.575 12.785 mp2_ri_gpw_compute_en_comm 30 7.0 0.110 0.121 4.090 4.458 mp_sendrecv_dm3 1860 8.0 3.061 3.443 3.061 3.443 dbcsr_multiply_generic 176 8.0 0.010 0.022 2.120 2.386 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 2.101 2.369 scf_env_do_scf 1 3.0 0.000 0.000 2.132 2.133 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 2.132 2.133 get_2c_integrals 1 6.0 0.004 0.010 1.433 1.461 multiply_cannon 176 9.0 0.019 0.022 1.246 1.331 multiply_cannon_loop 176 10.0 0.002 0.002 1.176 1.259 multiply_cannon_multrec 246 11.0 1.011 1.085 1.017 1.092 compute_2c_integrals 1 7.0 0.004 0.004 1.037 1.055 fill_local_i_aL 1920 8.0 0.983 1.054 0.983 1.054 make_m2s 352 9.0 0.003 0.005 0.829 1.027 make_images 352 10.0 0.061 0.070 0.814 1.012 qs_scf_new_mos 10 5.0 0.000 0.000 0.992 1.006 mp_min_d 2 7.0 0.613 0.991 0.613 0.991 mp2_ri_get_integ_group_size 1 6.0 0.000 0.000 0.588 0.952 compute_2c_integrals_loop_lm 1 8.0 0.002 0.003 0.921 0.951 mp2_eri_2c_integrate_gpw 1 9.0 0.236 0.242 0.919 0.950 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=60.448999999999984, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=73.89, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=25.1, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=11.031, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=9.949, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.855, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=9.677999999999997, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.023, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=12.575, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=4.97, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=1.011, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=3.061, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.101 0.101 141.040 141.040 qs_energies 1 2.0 0.000 0.000 139.496 139.496 scf_env_do_scf 1 3.0 0.000 0.000 130.502 130.502 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 130.502 130.502 qs_ks_update_qs_env 15 5.0 0.000 0.000 56.357 56.357 rebuild_ks_matrix 15 6.0 0.000 0.000 56.141 56.141 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 56.141 56.141 qs_scf_new_mos 15 5.0 0.000 0.000 47.042 47.042 eigensolver 15 6.0 0.002 0.002 38.771 38.771 qs_vxc_create 15 8.0 0.041 0.041 37.914 37.914 calculate_dispersion_nonloc 15 9.0 7.091 7.091 32.922 32.922 pw_transfer 1191 10.0 0.077 0.077 26.846 26.846 fft_wrap_pw1pw2 1086 11.0 0.011 0.011 26.628 26.628 cp_fm_diag_elpa 15 7.0 0.000 0.000 24.899 24.899 cp_fm_diag_elpa_base 15 8.0 22.311 22.311 24.899 24.899 qs_rho_update_rho_low 16 5.0 0.000 0.000 23.433 23.433 calculate_rho_elec 16 6.0 0.220 0.220 23.433 23.433 grid_collocate_task_list 16 7.0 21.977 21.977 21.977 21.977 fft_wrap_pw1pw2_150 765 12.0 4.082 4.082 19.522 19.522 sum_up_and_integrate 15 8.0 0.160 0.160 16.950 16.950 integrate_v_rspace 15 9.0 0.022 0.022 16.790 16.790 grid_integrate_task_list 15 10.0 16.058 16.058 16.058 16.058 fft3d_s 1087 13.0 12.288 12.288 12.297 12.297 cp_fm_cholesky_restore 45 7.0 11.458 11.458 11.458 11.458 pw_scatter_s 585 13.1 7.915 7.915 7.915 7.915 fft_wrap_pw1pw2_200 197 12.3 0.877 0.877 6.862 6.862 copy_dbcsr_to_fm 16 5.9 0.001 0.001 6.026 6.026 dbcsr_complete_redistribute 46 8.3 2.410 2.410 5.812 5.812 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 5.083 5.083 cp_fm_upper_to_full 30 8.0 5.000 5.000 5.000 5.000 xc_vxc_pw_create 15 9.0 0.224 0.224 4.951 4.951 gspace_mixing 14 5.0 0.172 0.172 4.948 4.948 vdW_energy 15 10.0 4.696 4.696 4.696 4.696 broyden_mixing 14 6.0 4.251 4.251 4.252 4.252 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 3.887 3.887 init_scf_run 1 3.0 0.000 0.000 3.427 3.427 xc_pw_derive 90 11.0 0.001 0.001 3.321 3.321 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.028 71.989 71.999 qs_energies 1 2.0 0.000 0.004 71.644 71.649 scf_env_do_scf 1 3.0 0.000 0.001 66.737 66.738 scf_env_do_scf_inner_loop 15 4.0 0.001 0.004 66.737 66.738 qs_ks_update_qs_env 15 5.0 0.000 0.000 30.675 30.696 rebuild_ks_matrix 15 6.0 0.000 0.000 30.633 30.653 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.004 30.633 30.653 qs_rho_update_rho_low 16 5.0 0.000 0.000 21.966 21.974 calculate_rho_elec 16 6.0 0.007 0.008 21.966 21.974 grid_collocate_task_list 16 7.0 20.479 20.701 20.479 20.701 sum_up_and_integrate 15 8.0 0.019 0.024 16.763 16.820 integrate_v_rspace 15 9.0 0.001 0.001 16.743 16.803 grid_integrate_task_list 15 10.0 15.659 15.838 15.659 15.838 qs_scf_new_mos 15 5.0 0.000 0.001 14.607 14.737 qs_vxc_create 15 8.0 0.001 0.001 13.438 13.449 eigensolver 15 6.0 0.002 0.002 13.315 13.354 pw_transfer 1191 10.0 0.099 0.107 10.993 11.045 fft_wrap_pw1pw2 1086 11.0 0.016 0.017 10.774 10.830 calculate_dispersion_nonloc 15 9.0 1.049 1.080 10.784 10.807 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.222 9.226 cp_fm_diag_elpa_base 15 8.0 9.052 9.063 9.218 9.221 fft3d_ps 1086 13.0 3.608 3.910 7.146 7.443 fft_wrap_pw1pw2_150 765 12.0 1.068 1.179 7.098 7.126 cp_fm_cholesky_restore 45 7.0 3.889 3.964 3.889 3.964 fft_wrap_pw1pw2_200 197 12.3 0.689 0.764 3.523 3.569 mp_alltoall_z22v 1086 15.0 2.772 3.463 2.772 3.463 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.134 3.134 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.786 3.018 xc_vxc_pw_create 15 9.0 0.019 0.024 2.654 2.697 x_to_yz 585 14.1 0.440 0.454 1.846 2.097 yz_to_x 501 13.9 0.293 0.320 1.659 2.053 xc_pw_derive 90 11.0 0.001 0.001 1.855 1.951 build_core_ppnl 1 5.0 1.733 1.876 1.733 1.876 vdW_energy 15 10.0 1.573 1.623 1.573 1.623 init_scf_run 1 3.0 0.000 0.001 1.513 1.514 rs_pw_transfer 158 9.4 0.002 0.002 1.117 1.468 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=56.94799999999999, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=22.311, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.977, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=16.058, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=12.288, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.458, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=19.302000000000007, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.052, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.479, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=15.659, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.889, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=3.608, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.110 0.110 299.633 299.633 qs_energies 1 2.0 0.000 0.000 299.458 299.458 ls_scf 1 3.0 0.000 0.000 298.151 298.151 ls_scf_main 1 4.0 0.002 0.002 288.023 288.023 density_matrix_trs4 11 5.0 0.014 0.014 186.054 186.054 ls_scf_dm_to_ks 11 5.0 0.000 0.000 95.532 95.532 matrix_ls_to_qs 11 6.0 0.000 0.000 91.841 91.841 dbcsr_multiply_generic 185 6.1 0.957 0.957 88.175 88.175 arnoldi_extremal 12 6.1 0.000 0.000 81.910 81.910 arnoldi_normal_ev 12 7.1 0.053 0.053 81.909 81.909 build_subspace 23 8.1 0.091 0.091 80.440 80.440 dbcsr_matrix_vector_mult 652 9.0 0.305 0.305 80.174 80.174 dbcsr_matrix_vector_mult_local 652 10.0 77.987 77.987 77.997 77.997 multiply_cannon 185 7.1 0.505 0.505 50.637 50.637 dbcsr_copy_into_existing 11 7.0 48.603 48.603 48.603 48.603 dbcsr_complete_redistribute 23 7.5 33.943 33.943 47.718 47.718 matrix_decluster 11 7.0 0.000 0.000 43.237 43.237 multiply_cannon_loop 185 8.1 0.280 0.280 36.986 36.986 make_m2s 370 7.1 0.041 0.041 31.487 31.487 make_images 370 8.1 13.240 13.240 28.624 28.624 multiply_cannon_multrec 185 9.1 27.019 27.019 27.201 27.201 dbcsr_finalize 646 7.5 0.376 0.376 18.812 18.812 dbcsr_merge_all 597 8.5 2.966 2.966 17.125 17.125 setup_rec_index_2d 370 8.1 12.919 12.919 12.919 12.919 dbcsr_sort_indices 1103 9.9 12.803 12.803 12.803 12.803 tree_to_linear_d 110 9.4 12.399 12.399 12.399 12.399 quick_finalize 395 10.0 0.536 0.536 11.169 11.169 dbcsr_special_finalize 370 9.1 0.002 0.002 10.328 10.328 calculate_norms 370 9.1 9.504 9.504 9.504 9.504 ls_scf_init_scf 1 4.0 0.000 0.000 9.327 9.327 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.843 8.843 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 8.103 8.103 dbcsr_dot_sd 144 6.3 7.277 7.277 7.278 7.278 matrix_qs_to_ls 12 5.1 0.000 0.000 6.611 6.611 matrix_cluster 12 6.1 0.000 0.000 6.611 6.611 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.025 88.312 88.324 qs_energies 1 2.0 0.000 0.000 88.186 88.187 ls_scf 1 3.0 0.000 0.000 88.126 88.127 ls_scf_main 1 4.0 0.001 0.009 84.765 84.766 density_matrix_trs4 11 5.0 0.007 0.021 81.664 81.790 dbcsr_multiply_generic 185 6.1 0.072 0.083 77.606 77.945 multiply_cannon 185 7.1 0.041 0.044 64.402 64.993 multiply_cannon_loop 185 8.1 0.158 0.168 61.214 62.490 multiply_cannon_multrec 1480 9.1 37.036 38.683 37.357 39.017 mp_waitall_1 11936 10.3 21.018 24.688 21.018 24.688 multiply_cannon_metrocomm3 1480 9.1 0.019 0.021 16.138 19.816 make_m2s 370 7.1 0.040 0.042 8.836 8.915 make_images 370 8.1 0.727 0.758 8.681 8.762 calculate_norms 2960 9.1 6.334 6.637 6.334 6.637 make_images_data 370 9.1 0.011 0.012 4.084 4.346 mp_sum_l 1199 5.3 3.305 3.885 3.305 3.885 hybrid_alltoall_any 393 9.9 0.325 1.958 3.658 3.858 arnoldi_extremal 12 6.1 0.000 0.001 3.160 3.176 arnoldi_normal_ev 12 7.1 0.002 0.009 3.160 3.176 build_subspace 23 8.1 0.027 0.033 3.047 3.049 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.001 2.335 2.775 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.650 2.733 ls_scf_init_scf 1 4.0 0.000 0.000 2.578 2.578 dbcsr_matrix_vector_mult 652 9.0 0.017 0.060 2.487 2.570 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.537 2.545 dbcsr_complete_redistribute 23 7.5 1.433 1.506 2.435 2.534 matrix_ls_to_qs 11 6.0 0.000 0.000 2.319 2.420 make_images_pack 370 9.1 2.074 2.322 2.078 2.326 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 2.305 2.308 matrix_decluster 11 7.0 0.000 0.000 2.176 2.278 dbcsr_matrix_vector_mult_local 652 10.0 1.982 2.077 1.984 2.080 buffer_matrices_ensure_size 370 8.1 1.906 2.024 1.906 2.024 multiply_cannon_metrocomm1 1480 9.1 0.007 0.009 1.181 1.965 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=89.33699999999996, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=77.987, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=48.603, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=33.943, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=27.019, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images", label="make_images", y=13.24, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.504, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=14.402999999999992, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.982, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.433, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=37.036, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images", label="make_images", y=0.727, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=6.334, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=21.018, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.074, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.305, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.008 81.087 81.087 lib_test 1 2.0 0.000 0.000 81.078 81.078 dbcsr_run_tests 3 3.0 0.002 0.002 81.078 81.078 test_multiplies_multiproc 3 4.0 0.001 0.001 64.726 64.726 dbcsr_redistribute 9 5.0 42.666 42.666 44.327 44.327 dbcsr_multiply_generic 9 5.0 0.001 0.001 18.972 18.972 dbcsr_make_random_matrix 9 4.0 12.908 12.908 16.236 16.236 multiply_cannon 9 6.0 0.003 0.003 13.599 13.599 multiply_cannon_loop 9 7.0 0.021 0.021 13.219 13.219 multiply_cannon_multrec 9 8.0 13.196 13.196 13.198 13.198 dbcsr_finalize 27 5.7 0.041 0.041 6.174 6.174 dbcsr_merge_all 18 6.5 2.258 2.258 5.369 5.369 dbcsr_data_release 975 7.6 2.921 2.921 2.921 2.921 tree_to_linear_d 9 7.0 2.033 2.033 2.033 2.033 make_m2s 18 6.0 0.001 0.001 1.869 1.869 make_images 18 7.0 0.633 0.633 1.792 1.792 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.015 22.174 22.178 lib_test 1 2.0 0.000 0.000 22.134 22.154 dbcsr_run_tests 3 3.0 0.000 0.001 22.133 22.152 test_multiplies_multiproc 3 4.0 0.000 0.003 21.167 21.211 dbcsr_multiply_generic 9 5.0 0.001 0.001 19.580 19.675 multiply_cannon 9 6.0 0.002 0.002 17.226 17.844 multiply_cannon_loop 9 7.0 0.002 0.002 16.888 17.510 multiply_cannon_multrec 72 8.0 12.920 13.331 12.921 13.331 mp_waitall_1 576 9.2 4.375 5.133 4.375 5.133 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 3.842 4.693 mp_sum_l 470 2.5 0.904 1.532 0.904 1.532 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.882 1.509 dbcsr_make_random_matrix 9 4.0 0.762 0.778 0.926 0.956 make_m2s 18 6.0 0.001 0.001 0.867 0.904 make_images 18 7.0 0.024 0.031 0.863 0.901 dbcsr_finalize 27 5.7 0.000 0.000 0.644 0.733 dbcsr_data_release 444 7.6 0.611 0.711 0.611 0.711 dbcsr_redistribute 9 5.0 0.274 0.316 0.652 0.681 dbcsr_destroy 111 5.9 0.003 0.066 0.519 0.617 dbcsr_merge_all 18 6.5 0.098 0.115 0.548 0.615 make_images_data 18 8.0 0.001 0.001 0.465 0.539 hybrid_alltoall_any 18 9.0 0.048 0.208 0.409 0.471 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.119 0.458 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=7.138000000000005, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=42.666, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=13.196, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.908, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.921, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.258, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=2.229999999999997, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.274, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=12.92, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.762, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.611, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.098, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.904, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.375, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.055 0.055 143.800 143.800 qs_mol_dyn_low 1 2.0 0.004 0.004 142.257 142.257 velocity_verlet 5 3.0 0.004 0.004 114.787 114.787 qmmm_el_coupling 6 3.8 0.000 0.000 87.884 87.884 qmmm_elec_with_gaussian 6 4.8 0.033 0.033 87.874 87.874 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 86.881 86.881 qmmm_elec_gaussian_low_G 6 6.8 85.930 85.930 85.930 85.930 qs_forces 6 3.8 0.001 0.001 44.530 44.530 qs_energies 6 4.8 0.000 0.000 39.615 39.615 scf_env_do_scf 6 5.8 0.001 0.001 36.933 36.933 rebuild_ks_matrix 45 8.4 0.000 0.000 32.360 32.360 qs_ks_build_kohn_sham_matrix 45 9.4 0.006 0.006 32.360 32.360 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 32.250 32.250 qs_ks_update_qs_env 45 7.8 0.000 0.000 27.917 27.917 pw_transfer 966 12.3 0.062 0.062 19.362 19.362 fft_wrap_pw1pw2 801 13.6 0.007 0.007 19.108 19.108 fft_wrap_pw1pw2_150 507 15.2 2.879 2.879 18.677 18.677 qs_vxc_create 45 10.4 0.001 0.001 16.315 16.315 xc_vxc_pw_create 45 11.4 0.794 0.794 16.314 16.314 xc_pw_derive 270 13.4 0.002 0.002 10.652 10.652 xc_rho_set_and_dset_create 45 12.4 1.289 1.289 8.870 8.870 fft3d_s 802 15.6 8.383 8.383 8.394 8.394 qs_rho_update_rho_low 45 7.9 0.000 0.000 8.191 8.191 calculate_rho_elec 45 8.9 0.658 0.658 8.191 8.191 pw_integral_ab 2539 7.4 7.720 7.720 7.720 7.720 xc_pw_divergence 45 12.4 0.001 0.001 6.545 6.545 qmmm_forces 6 3.8 0.003 0.003 6.442 6.442 pw_scatter_s 429 15.8 6.358 6.358 6.358 6.358 qmmm_forces_with_gaussian 6 4.8 0.040 0.040 6.035 6.035 qs_ks_ddapc 45 10.4 0.001 0.001 5.647 5.647 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.895 4.895 init_scf_loop 6 6.8 0.000 0.000 4.677 4.677 sum_up_and_integrate 45 10.4 0.995 0.995 4.531 4.531 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 4.462 4.462 qmmm_forces_gaussian_low_G 6 6.8 4.057 4.057 4.057 4.057 pw_poisson_solve 51 9.9 0.901 0.901 3.920 3.920 cp_ddapc_apply_CD 45 11.4 0.007 0.007 3.822 3.822 grid_collocate_task_list 45 9.9 3.772 3.772 3.772 3.772 density_rs2pw 45 9.9 0.002 0.002 3.761 3.761 integrate_v_rspace 45 11.4 0.008 0.008 3.536 3.536 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.045 72.923 72.934 qs_mol_dyn_low 1 2.0 0.003 0.005 71.664 71.750 qs_forces 6 3.8 0.001 0.001 53.267 53.268 qs_energies 6 4.8 0.000 0.000 50.817 50.818 scf_env_do_scf 6 5.8 0.000 0.001 49.587 49.587 scf_env_do_scf_inner_loop 113 6.2 0.003 0.020 47.492 47.493 rebuild_ks_matrix 119 8.1 0.000 0.000 35.688 35.700 qs_ks_build_kohn_sham_matrix 119 9.1 0.018 0.025 35.688 35.700 qs_ks_update_qs_env 119 7.3 0.001 0.001 33.611 33.623 velocity_verlet 5 3.0 0.002 0.005 30.424 30.427 pw_transfer 2446 12.3 0.182 0.196 25.776 26.383 fft_wrap_pw1pw2 2059 13.4 0.027 0.031 25.281 25.886 fft_wrap_pw1pw2_150 1321 14.9 4.644 5.095 24.433 25.037 qs_vxc_create 119 10.1 0.002 0.003 19.471 19.475 xc_vxc_pw_create 119 11.1 0.180 0.241 19.468 19.473 fft3d_ps 2059 15.4 9.478 10.271 16.336 17.288 xc_pw_derive 714 13.1 0.009 0.010 15.037 15.328 qs_rho_update_rho_low 119 7.3 0.001 0.001 13.515 13.523 calculate_rho_elec 119 8.3 0.055 0.064 13.514 13.522 sum_up_and_integrate 119 10.1 0.094 0.111 11.584 11.614 integrate_v_rspace 119 11.1 0.004 0.005 11.490 11.528 xc_pw_divergence 119 12.1 0.004 0.005 10.003 10.232 xc_rho_set_and_dset_create 119 12.1 0.409 0.519 9.006 9.259 qmmm_forces 6 3.8 0.002 0.003 9.243 9.244 qmmm_forces_with_gaussian 6 4.8 0.009 0.011 8.910 9.039 density_rs2pw 119 9.3 0.007 0.008 8.149 8.522 rs_pw_transfer 988 11.5 0.015 0.016 7.929 8.304 qmmm_el_coupling 6 3.8 0.000 0.000 8.135 8.193 qmmm_elec_with_gaussian 6 4.8 0.003 0.005 8.134 8.191 potential_pw2rs 119 12.1 0.007 0.009 7.021 7.039 mp_alltoall_z22v 2059 17.4 5.171 6.146 5.171 6.146 grid_collocate_task_list 119 9.3 5.154 5.514 5.154 5.514 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.693 4.744 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 4.015 4.510 grid_integrate_task_list 119 12.1 4.129 4.357 4.129 4.357 x_to_yz 1095 16.8 0.940 1.041 3.680 4.035 qmmm_forces_gaussian_low_G 6 6.8 3.859 3.908 3.859 3.908 qmmm_elec_gaussian_low_G 6 6.8 3.333 3.821 3.333 3.821 yz_to_x 964 16.0 0.684 0.847 3.115 3.775 mp_waitany 4028 12.8 3.136 3.718 3.136 3.718 pw_restrict_s3 18 5.8 1.681 1.710 3.468 3.575 rs_pw_transfer_PW2RS_150 125 13.9 1.117 1.183 3.151 3.185 qs_scf_new_mos 113 7.2 0.001 0.002 2.853 2.864 qs_scf_loop_do_ot 113 8.2 0.001 0.001 2.852 2.864 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.778 2.842 pw_prolongate_s3 18 6.8 1.388 1.418 2.778 2.842 qs_ks_ddapc 119 10.1 0.003 0.003 2.746 2.823 pw_gather_p 964 15.0 2.431 2.788 2.431 2.788 ot_scf_mini 113 9.2 0.001 0.002 2.751 2.765 rs_pw_transfer_RS2PW_150 125 11.2 0.780 0.882 2.343 2.740 dbcsr_multiply_generic 2588 12.3 0.068 0.072 2.426 2.458 pw_integral_ab 2761 7.7 1.671 1.739 2.055 2.176 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.117 2.118 init_scf_loop 6 6.8 0.000 0.000 2.091 2.091 mp_sum_dm3 33 5.7 1.939 1.994 1.939 1.994 pw_scatter_p 1095 15.8 1.780 1.817 1.780 1.817 ot_mini 113 10.2 0.001 0.001 1.539 1.550 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=24.700999999999993, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=85.93, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.383, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=7.72, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=6.358, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.057, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.772, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=2.879, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=35.484, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=3.333, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.671, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.859, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.154, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=4.644, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=4.129, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=9.478, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.171, yerr=0.0 Summary: Performance test took 37 minutes. Status: OK Removing intermediate container 4ab3adb5be1c ---> b49377cf3bbf Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in fa7dbc2ab8ef Removing intermediate container fa7dbc2ab8ef ---> 55e4390eeca6 Step 42/42 : ENTRYPOINT [] ---> Running in d3743d011db8 Removing intermediate container d3743d011db8 ---> 05476fa05813 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 05476fa05813 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2023-07-19 15:49:50+00:00