StartDate: 2023-03-24 19:25:07+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: dc34ec903ce29fa3e138f690b3e7fbdf40732b94 CommitTime: 2023-03-24 16:55:38 +0100 CommitAuthor: Ole Schuett CommitSubject: Add generate_apptainer_def_files.py to test_python.sh Populating docker build cache... done. #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=dc34ec903ce29fa3e138f690b3e7fbdf40732b94 DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 367.8MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 2ab09b027e7f: Pulling fs layer 2ab09b027e7f: Verifying Checksum 2ab09b027e7f: Download complete 2ab09b027e7f: Pull complete Digest: sha256:67211c14fa74f070d27cc59d69a7fa9aeff8e28ea118ef3babc295a0428a6d21 Status: Downloaded newer image for ubuntu:22.04 ---> 08d22c0ceb15 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> 47a3542ef7e0 Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> e86e12710088 Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 5ad7a46db6e4 Step 5/42 : RUN mkdir scripts ---> Using cache ---> 2ed50184e1de Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 2cfaec0caa31 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> e3c7243c9e7e Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> f053587e71f2 Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 27b6a69300c0 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> af05dc2abbcf Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 72a9160365c6 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> 08767ccb9543 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> c93c73c59c7d Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> 46507f029160 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> a3d4507934f4 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 685c0a7bb753 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> b6060b066447 Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 8aaf7d16b2e3 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> 828b2697b61f Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 2f8752bf29da Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> cdd2991d30c3 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> de789914448a Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> 102898d8e707 Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 055fbb9b34a4 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> 701bcf41a83b Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 59acf763e116 Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> da2c17ca82a1 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> 89c7d0cefba2 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> ff60209d462a Step 30/42 : COPY ./Makefile . ---> Using cache ---> 6494f2b0a1b0 Step 31/42 : COPY ./src ./src ---> Using cache ---> ed5053988fb0 Step 32/42 : COPY ./exts ./exts ---> Using cache ---> 768736c3f838 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> baee2d5b7cb3 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && rm -rf lib obj && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in 91a18d08b521 './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container 91a18d08b521 ---> 0153b5435cbd Step 35/42 : COPY ./data ./data ---> 462c4637db8f Step 36/42 : COPY ./tests ./tests ---> 2be8c51d24d3 Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> 18b6f48d5806 Step 38/42 : COPY ./benchmarks ./benchmarks ---> 96afa28924ce Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 68d20c83ddd4 Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in c6d52bb8feaf ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 70 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.035 0.035 99.124 99.124 qs_mol_dyn_low 1 2.0 0.003 0.003 98.456 98.456 qs_forces 11 3.9 0.002 0.002 98.414 98.414 qs_energies 11 4.9 0.001 0.001 91.349 91.349 scf_env_do_scf 11 5.9 0.001 0.001 78.524 78.524 velocity_verlet 10 3.0 0.002 0.002 66.368 66.368 scf_env_do_scf_inner_loop 108 6.5 0.011 0.011 60.315 60.315 rebuild_ks_matrix 119 8.3 0.001 0.001 24.076 24.076 qs_ks_build_kohn_sham_matrix 119 9.3 0.013 0.013 24.075 24.075 qs_ks_update_qs_env 119 7.6 0.001 0.001 23.473 23.473 qs_scf_new_mos 108 7.5 0.001 0.001 21.906 21.906 qs_scf_loop_do_ot 108 8.5 0.001 0.001 21.905 21.905 dbcsr_multiply_generic 2286 12.5 0.166 0.166 21.278 21.278 ot_scf_mini 108 9.5 0.002 0.002 20.349 20.349 qs_rho_update_rho_low 119 7.7 0.001 0.001 19.398 19.398 calculate_rho_elec 119 8.7 1.073 1.073 19.397 19.397 init_scf_loop 11 6.9 0.000 0.000 17.988 17.988 grid_collocate_task_list 119 9.7 14.891 14.891 14.891 14.891 prepare_preconditioner 11 7.9 0.000 0.000 14.660 14.660 make_preconditioner 11 8.9 0.000 0.000 14.660 14.660 sum_up_and_integrate 119 10.3 1.424 1.424 14.325 14.325 ot_mini 108 10.5 0.001 0.001 13.456 13.456 make_full_inverse_cholesky 11 9.9 0.000 0.000 13.027 13.027 integrate_v_rspace 119 11.3 0.113 0.113 12.901 12.901 make_m2s 4572 13.5 0.045 0.045 11.320 11.320 grid_integrate_task_list 119 12.3 10.639 10.639 10.639 10.639 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.437 7.437 ot_diis_step 108 11.5 0.004 0.004 6.846 6.846 qs_ot_get_derivative 108 11.5 0.001 0.001 6.606 6.606 pw_transfer 1439 11.6 0.068 0.068 6.055 6.055 multiply_cannon 2286 13.5 0.190 0.190 5.992 5.992 make_images 4572 14.5 2.493 2.493 5.988 5.988 fft_wrap_pw1pw2 1201 12.6 0.007 0.007 5.822 5.822 dbcsr_make_dense_low 5837 15.5 0.084 0.084 5.730 5.730 make_dense_data 5837 16.5 5.040 5.040 5.632 5.632 multiply_cannon_loop 2286 14.5 0.050 0.050 5.516 5.516 dbcsr_complete_redistribute 329 12.2 3.052 3.052 5.508 5.508 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.471 5.471 apply_single 119 13.6 0.000 0.000 5.471 5.471 multiply_cannon_multrec 2286 15.5 5.411 5.411 5.465 5.465 fft_wrap_pw1pw2_140 487 13.2 0.459 0.459 5.006 5.006 dbcsr_make_images_dense 3978 14.8 0.016 0.016 4.954 4.954 cp_fm_cholesky_decompose 22 10.9 4.868 4.868 4.868 4.868 init_scf_run 11 5.9 0.002 0.002 4.532 4.532 scf_env_initial_rho_setup 11 6.9 0.001 0.001 4.530 4.530 copy_dbcsr_to_fm 153 11.3 0.002 0.002 4.324 4.324 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.262 4.262 wfi_extrapolate 11 7.9 0.001 0.001 4.016 4.016 dbcsr_dot_sd 1205 11.9 3.946 3.946 3.948 3.948 qs_create_task_list 11 7.9 0.000 0.000 3.828 3.828 generate_qs_task_list 11 8.9 2.286 2.286 3.828 3.828 pw_poisson_solve 119 10.3 1.001 1.001 3.802 3.802 dbcsr_copy 2102 12.0 0.283 0.283 3.790 3.790 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.629 3.629 dbcsr_copy_into_existing 22 7.9 3.470 3.470 3.470 3.470 density_rs2pw 119 9.7 0.005 0.005 3.433 3.433 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.433 3.433 qs_ot_get_p 119 10.4 0.001 0.001 3.250 3.250 cp_fm_cholesky_invert 11 10.9 3.167 3.167 3.167 3.167 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 3.101 3.101 evaluate_core_matrix_traces 119 8.3 0.001 0.001 2.829 2.829 calculate_ptrace_kp 238 9.3 0.001 0.001 2.828 2.828 fft3d_s 1202 14.6 2.759 2.759 2.763 2.763 copy_fm_to_dbcsr 176 11.2 0.001 0.001 2.709 2.709 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.678 2.678 pw_integral_ab 119 11.3 2.614 2.614 2.614 2.614 dbcsr_finalize 5186 13.8 0.208 0.208 2.532 2.532 dbcsr_data_release 279534 16.0 2.465 2.465 2.465 2.465 potential_pw2rs 119 12.3 0.113 0.113 2.149 2.149 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 2.094 2.094 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 2.094 2.094 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 2.021 2.021 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.023 73.943 73.952 qs_mol_dyn_low 1 2.0 0.003 0.004 73.812 73.816 qs_forces 11 3.9 0.002 0.003 73.758 73.759 qs_energies 11 4.9 0.001 0.001 69.237 69.247 scf_env_do_scf 11 5.9 0.001 0.003 63.613 63.616 scf_env_do_scf_inner_loop 108 6.5 0.004 0.032 58.358 58.359 velocity_verlet 10 3.0 0.002 0.003 46.012 46.013 rebuild_ks_matrix 119 8.3 0.001 0.002 26.367 26.439 qs_ks_build_kohn_sham_matrix 119 9.3 0.022 0.024 26.367 26.438 qs_ks_update_qs_env 119 7.6 0.002 0.002 23.296 23.349 qs_rho_update_rho_low 119 7.7 0.001 0.001 22.057 22.123 calculate_rho_elec 119 8.7 0.032 0.038 22.056 22.122 dbcsr_multiply_generic 2286 12.5 0.105 0.117 20.838 21.240 sum_up_and_integrate 119 10.3 0.050 0.060 20.202 20.291 integrate_v_rspace 119 11.3 0.006 0.006 20.152 20.248 qs_scf_new_mos 108 7.5 0.001 0.001 17.686 17.731 qs_scf_loop_do_ot 108 8.5 0.001 0.001 17.685 17.730 ot_scf_mini 108 9.5 0.003 0.003 16.542 16.598 multiply_cannon 2286 13.5 0.177 0.200 13.650 14.210 multiply_cannon_loop 2286 14.5 0.124 0.142 12.392 12.721 grid_collocate_task_list 119 9.7 10.576 11.644 10.576 11.644 mp_waitall_1 158411 16.6 11.068 11.630 11.068 11.630 rs_pw_transfer 974 11.9 0.016 0.018 10.838 11.525 density_rs2pw 119 9.7 0.007 0.008 10.416 11.105 grid_integrate_task_list 119 12.3 9.515 10.182 9.515 10.182 ot_mini 108 10.5 0.001 0.001 9.982 10.058 multiply_cannon_metrocomm3 18288 15.5 0.043 0.048 7.447 8.159 mp_waitany 9880 13.7 6.108 6.810 6.108 6.810 pw_transfer 1439 11.6 0.101 0.111 6.708 6.759 fft_wrap_pw1pw2 1201 12.6 0.012 0.014 6.506 6.561 rs_pw_transfer_RS2PW_140 130 11.5 0.252 0.277 5.593 6.303 mp_alltoall_d11v 2130 13.8 5.396 6.004 5.396 6.004 qs_ot_get_derivative 108 11.5 0.001 0.001 5.536 5.595 potential_pw2rs 119 12.3 0.007 0.008 5.567 5.582 rs_gather_matrices 119 12.3 0.160 0.173 4.991 5.572 init_scf_loop 11 6.9 0.000 0.000 5.228 5.229 fft_wrap_pw1pw2_140 487 13.2 0.963 1.037 5.026 5.139 fft3d_ps 1201 14.6 1.737 1.852 4.561 4.679 ot_diis_step 108 11.5 0.005 0.006 4.309 4.310 init_scf_run 11 5.9 0.000 0.005 4.201 4.202 scf_env_initial_rho_setup 11 6.9 0.000 0.004 4.201 4.202 make_m2s 4572 13.5 0.058 0.065 3.878 3.991 wfi_extrapolate 11 7.9 0.001 0.001 3.967 3.967 multiply_cannon_multrec 18288 15.5 3.609 3.884 3.624 3.901 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.786 3.878 apply_single 119 13.6 0.001 0.001 3.786 3.878 make_images 4572 14.5 0.138 0.146 3.459 3.568 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.278 3.300 mp_sum_l 11218 13.2 2.677 3.005 2.677 3.005 qs_ot_get_p 119 10.4 0.001 0.001 2.491 2.587 mp_alltoall_z22v 1201 16.6 2.427 2.582 2.427 2.582 mp_sum_d 4135 12.0 2.002 2.312 2.002 2.312 prepare_preconditioner 11 7.9 0.000 0.000 2.083 2.109 make_preconditioner 11 8.9 0.000 0.000 2.083 2.109 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 1.900 1.949 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.889 1.920 make_images_data 4572 15.5 0.040 0.047 1.517 1.712 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.650 1.706 rs_pw_transfer_PW2RS_140 130 13.9 0.453 0.479 1.631 1.663 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.002 1.488 1.494 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=58.27499999999999, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=14.891, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.639, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=5.411, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.04, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.868, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=27.671, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=10.576, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=9.515, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.609, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=6.108, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=5.396, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=11.068, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.029 0.029 115.124 115.124 qs_mol_dyn_low 1 2.0 0.002 0.002 114.454 114.454 qs_forces 11 3.9 0.001 0.001 114.413 114.413 qs_energies 11 4.9 0.001 0.001 106.387 106.387 scf_env_do_scf 11 5.9 0.001 0.001 93.358 93.358 velocity_verlet 10 3.0 0.002 0.002 74.803 74.803 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 74.268 74.268 rebuild_ks_matrix 107 8.3 0.001 0.001 34.319 34.319 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 34.318 34.318 qs_rho_update_rho_low 107 7.7 0.001 0.001 32.469 32.469 calculate_rho_elec 107 8.7 0.860 0.860 32.469 32.469 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.759 30.759 grid_collocate_task_list 107 9.7 28.207 28.207 28.207 28.207 sum_up_and_integrate 107 10.3 0.753 0.753 27.487 27.487 integrate_v_rspace 107 11.3 0.085 0.085 26.735 26.735 grid_integrate_task_list 107 12.3 24.836 24.836 24.836 24.836 init_scf_loop 11 6.9 0.000 0.000 18.916 18.916 dbcsr_multiply_generic 1966 12.4 0.138 0.138 18.555 18.555 qs_scf_new_mos 96 7.5 0.001 0.001 18.037 18.037 qs_scf_loop_do_ot 96 8.5 0.001 0.001 18.036 18.036 ot_scf_mini 96 9.5 0.002 0.002 16.821 16.821 prepare_preconditioner 11 7.9 0.000 0.000 14.717 14.717 make_preconditioner 11 8.9 0.000 0.000 14.717 14.717 make_full_inverse_cholesky 11 9.9 0.000 0.000 13.517 13.517 ot_mini 96 10.5 0.001 0.001 11.051 11.051 make_m2s 3932 13.4 0.042 0.042 9.997 9.997 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.826 6.826 pw_transfer 1295 11.6 0.062 0.062 5.925 5.925 qs_ot_get_derivative 96 11.5 0.001 0.001 5.719 5.719 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.708 5.708 init_scf_run 11 5.9 0.002 0.002 5.521 5.521 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.519 5.519 dbcsr_make_dense_low 4961 15.5 0.075 0.075 5.416 5.416 make_dense_data 4961 16.5 4.778 4.778 5.330 5.330 ot_diis_step 96 11.5 0.003 0.003 5.329 5.329 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.029 5.029 apply_single 107 13.6 0.000 0.000 5.028 5.028 fft_wrap_pw1pw2_140 439 13.2 0.550 0.550 4.962 4.962 wfi_extrapolate 11 7.9 0.001 0.001 4.921 4.921 make_images 3932 14.4 1.950 1.950 4.907 4.907 cp_fm_cholesky_decompose 22 10.9 4.902 4.902 4.902 4.902 multiply_cannon 1966 13.4 0.205 0.205 4.817 4.817 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.772 4.772 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.451 4.451 cp_fm_cholesky_invert 11 10.9 4.335 4.335 4.335 4.335 multiply_cannon_loop 1966 14.4 0.059 0.059 4.283 4.283 multiply_cannon_multrec 1966 15.4 4.177 4.177 4.223 4.223 dbcsr_complete_redistribute 317 12.2 1.958 1.958 3.857 3.857 dbcsr_copy 1855 11.9 0.225 0.225 3.759 3.759 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.734 3.734 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.573 3.573 dbcsr_copy_into_existing 22 7.9 3.499 3.499 3.499 3.499 density_rs2pw 107 9.7 0.005 0.005 3.401 3.401 qs_create_task_list 11 7.9 0.000 0.000 3.330 3.330 generate_qs_task_list 11 8.9 2.393 2.393 3.330 3.330 copy_dbcsr_to_fm 147 11.2 0.002 0.002 3.243 3.243 fft3d_s 1082 14.6 2.789 2.789 2.793 2.793 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.658 2.658 qs_ot_get_p 107 10.4 0.001 0.001 2.598 2.598 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.495 2.495 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.028 70.549 70.559 qs_mol_dyn_low 1 2.0 0.003 0.004 70.427 70.431 qs_forces 11 3.9 0.002 0.002 70.387 70.387 qs_energies 11 4.9 0.001 0.001 65.672 65.674 scf_env_do_scf 11 5.9 0.000 0.002 60.823 60.824 scf_env_do_scf_inner_loop 96 6.5 0.002 0.017 56.265 56.265 velocity_verlet 10 3.0 0.001 0.003 42.023 42.024 rebuild_ks_matrix 107 8.3 0.000 0.000 30.541 30.654 qs_ks_build_kohn_sham_matrix 107 9.3 0.013 0.021 30.540 30.653 qs_ks_update_qs_env 107 7.6 0.001 0.001 26.918 27.014 sum_up_and_integrate 107 10.3 0.033 0.037 26.753 26.777 integrate_v_rspace 107 11.3 0.004 0.005 26.719 26.747 qs_rho_update_rho_low 107 7.7 0.000 0.001 26.040 26.052 calculate_rho_elec 107 8.7 0.027 0.028 26.039 26.051 grid_integrate_task_list 107 12.3 22.791 23.176 22.791 23.176 grid_collocate_task_list 107 9.7 21.796 22.183 21.796 22.183 dbcsr_multiply_generic 1966 12.4 0.063 0.066 11.540 11.665 qs_scf_new_mos 96 7.5 0.000 0.001 9.037 9.129 qs_scf_loop_do_ot 96 8.5 0.001 0.001 9.037 9.129 multiply_cannon 1966 13.4 0.116 0.127 8.550 8.779 ot_scf_mini 96 9.5 0.002 0.002 8.475 8.572 multiply_cannon_loop 1966 14.4 0.077 0.090 8.038 8.286 mp_waitall_1 136719 16.5 6.547 7.227 6.547 7.227 multiply_cannon_metrocomm3 15728 15.4 0.030 0.034 4.630 5.355 ot_mini 96 10.5 0.001 0.001 5.002 5.105 rs_pw_transfer 878 11.9 0.009 0.011 4.054 4.643 init_scf_loop 11 6.9 0.000 0.000 4.546 4.546 density_rs2pw 107 9.7 0.004 0.005 3.705 4.331 init_scf_run 11 5.9 0.000 0.004 3.816 3.816 scf_env_initial_rho_setup 11 6.9 0.000 0.003 3.816 3.816 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.747 3.765 pw_transfer 1295 11.6 0.069 0.076 3.636 3.732 fft_wrap_pw1pw2 1081 12.6 0.007 0.008 3.508 3.591 wfi_extrapolate 11 7.9 0.001 0.001 3.479 3.480 fft_wrap_pw1pw2_140 439 13.2 0.575 0.708 3.000 3.174 potential_pw2rs 107 12.3 0.005 0.006 2.981 3.004 multiply_cannon_multrec 15728 15.4 2.689 2.959 2.697 2.969 qs_ot_get_derivative 96 11.5 0.001 0.001 2.510 2.607 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.398 2.475 apply_single 107 13.6 0.000 0.000 2.398 2.474 ot_diis_step 96 11.5 0.003 0.003 2.472 2.472 fft3d_ps 1081 14.6 1.097 1.275 2.149 2.362 make_m2s 3932 13.4 0.039 0.041 1.966 2.038 mp_waitany 8968 13.7 1.409 1.951 1.409 1.951 make_images 3932 14.4 0.102 0.105 1.680 1.747 mp_alltoall_d11v 1998 13.7 1.089 1.730 1.089 1.730 rs_pw_transfer_RS2PW_140 118 11.5 0.181 0.203 1.093 1.680 rs_gather_matrices 107 12.3 0.098 0.108 0.891 1.549 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=43.88900000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=28.207, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.836, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.902, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.778, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=4.335, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.177, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=15.317000000000007, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.796, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.791, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_invert", label="cp_fm_cholesky_invert", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=2.689, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.409, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=6.547, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.176 0.176 107.224 107.224 qs_energies 1 2.0 0.000 0.000 106.390 106.390 scf_env_do_scf 1 3.0 0.000 0.000 105.164 105.164 qs_ks_update_qs_env 8 5.0 0.000 0.000 99.937 99.937 rebuild_ks_matrix 7 6.0 0.000 0.000 99.881 99.881 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 99.881 99.881 hfx_ks_matrix 7 8.0 0.000 0.000 90.582 90.582 integrate_four_center 7 9.0 1.646 1.646 90.538 90.538 integrate_four_center_main 7 10.0 0.308 0.308 82.828 82.828 integrate_four_center_bin 452 11.0 82.520 82.520 82.520 82.520 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 57.181 57.181 init_scf_loop 1 4.0 0.000 0.000 47.968 47.968 integrate_four_center_load 7 10.0 0.000 0.000 5.817 5.817 hfx_load_balance 1 11.0 0.001 0.001 5.816 5.816 qs_vxc_create 14 8.0 0.000 0.000 3.344 3.344 xc_vxc_pw_create 14 9.0 0.159 0.159 3.344 3.344 hfx_load_balance_bin 1 12.0 2.931 2.931 2.931 2.931 hfx_load_balance_count 1 12.0 2.868 2.868 2.868 2.868 xc_rho_set_and_dset_create 14 10.0 0.116 0.116 2.519 2.519 calculate_rho_elec 15 7.4 0.118 0.118 2.511 2.511 prepare_preconditioner 1 5.0 0.000 0.000 2.464 2.464 make_preconditioner 1 6.0 0.000 0.000 2.464 2.464 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 2.303 2.303 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.222 0.243 107.056 107.067 qs_energies 1 2.0 0.000 0.000 106.644 106.651 scf_env_do_scf 1 3.0 0.000 0.000 106.272 106.273 qs_ks_update_qs_env 8 5.0 0.000 0.000 103.821 103.822 rebuild_ks_matrix 7 6.0 0.000 0.000 103.811 103.812 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 103.811 103.812 hfx_ks_matrix 7 8.0 0.000 0.000 97.469 97.470 integrate_four_center 7 9.0 0.059 0.352 97.459 97.460 integrate_four_center_main 7 10.0 0.004 0.004 86.370 89.622 integrate_four_center_bin 448 11.0 86.366 89.617 86.366 89.617 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 60.393 60.393 init_scf_loop 1 4.0 0.000 0.000 45.878 45.879 mp_sync 56 11.2 4.500 6.216 4.500 6.216 integrate_four_center_load 7 10.0 0.000 0.000 5.875 5.875 hfx_load_balance 1 11.0 0.001 0.001 5.874 5.875 hfx_load_balance_count 1 12.0 2.884 2.957 2.884 2.957 hfx_load_balance_bin 1 12.0 2.837 2.914 2.837 2.914 qs_vxc_create 14 8.0 0.000 0.001 2.739 2.739 xc_vxc_pw_create 14 9.0 0.008 0.009 2.738 2.738 xc_rho_set_and_dset_create 14 10.0 0.013 0.015 2.069 2.183 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=16.775000000000006, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=82.52, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.931, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.868, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.646, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.308, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.176, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=10.183999999999997, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=86.366, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.837, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.884, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.059, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.004, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.222, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=4.5, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.013 0.013 80.446 80.446 qs_energies 1 2.0 0.000 0.000 80.057 80.057 mp2_main 1 3.0 0.000 0.000 77.247 77.247 mp2_gpw_main 1 4.0 0.000 0.000 77.145 77.145 rpa_ri_compute_en 1 5.0 0.000 0.000 73.446 73.446 rpa_num_int 1 6.0 0.001 0.001 73.440 73.440 compute_mat_P_omega 1 7.0 0.003 0.003 63.512 63.512 compute_mat_P_omega_contract 10 8.0 8.965 8.965 63.312 63.312 dbt_total 2336 9.6 0.013 0.013 49.325 49.325 dbt_contract 787 11.0 0.039 0.039 42.403 42.403 dbt_tas_total 1149 12.2 0.203 0.203 41.386 41.386 dbt_tas_multiply 807 12.1 0.002 0.002 40.104 40.104 dbt_tas_dbm 807 14.1 0.003 0.003 33.906 33.906 dbm_multiply 807 16.1 33.898 33.898 33.898 33.898 dbt_tas_mm_1N 524 15.1 0.001 0.001 26.843 26.843 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 25.377 25.377 compute_mat_P_omega_calc_M_occ 250 9.0 8.993 8.993 17.168 17.168 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 6.332 6.332 dbt_copy 1103 10.7 0.103 0.103 5.623 5.623 dbt_tas_mm_2 251 15.0 0.001 0.001 5.556 5.556 compute_QP_energies 1 7.0 0.000 0.000 4.924 4.924 compute_self_energy_cubic_gw 1 8.0 0.053 0.053 4.923 4.923 contract_cubic_gw 21 9.0 0.000 0.000 3.896 3.896 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.692 3.692 dbt_tas_reserve_blocks_index 3261 14.3 0.480 0.480 3.633 3.633 dbm_reserve_blocks 3628 15.3 3.222 3.222 3.222 3.222 dbt_reserve_blocks_index 2280 13.1 0.052 0.052 2.717 2.717 scf_env_do_scf 1 3.0 0.000 0.000 2.677 2.677 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.677 2.677 dbt_reserve_blocks_index_array 2222 12.2 0.008 0.008 2.671 2.671 dbt_tas_copy 574 11.4 1.314 1.314 2.251 2.251 dbt_crop 1042 12.0 1.325 1.325 2.182 2.182 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.111 2.111 convert_to_new_pgrid 2421 14.1 0.072 0.072 2.107 2.107 dbm_copy 1614 15.1 2.035 2.035 2.035 2.035 compute_W_cubic_GW 10 7.0 0.007 0.007 2.030 2.030 dbt_tas_reshape 367 15.0 0.019 0.019 2.012 2.012 dbt_reshape 278 11.9 1.022 1.022 1.873 1.873 get_2c_integrals 1 6.0 0.000 0.000 1.782 1.782 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.024 31.494 31.503 qs_energies 1 2.0 0.000 0.000 31.369 31.370 mp2_main 1 3.0 0.000 0.000 30.447 30.449 mp2_gpw_main 1 4.0 0.000 0.001 30.416 30.418 rpa_ri_compute_en 1 5.0 0.000 0.000 29.156 29.157 rpa_num_int 1 6.0 0.000 0.002 29.155 29.157 dbt_total 2336 9.6 0.015 0.016 25.961 25.962 compute_mat_P_omega 1 7.0 0.001 0.005 24.952 24.983 compute_mat_P_omega_contract 10 8.0 0.423 0.439 24.830 24.833 dbt_contract 787 11.0 0.037 0.039 18.919 18.922 dbt_tas_total 1149 12.2 0.075 0.082 16.701 16.702 dbt_tas_multiply 807 12.1 0.002 0.002 16.645 16.647 dbt_tas_dbm 807 14.1 0.003 0.003 12.548 12.552 dbm_multiply 807 16.1 9.997 10.812 9.997 10.812 compute_mat_P_omega_calc_M_occ 250 9.0 0.399 0.413 7.351 7.351 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 7.128 7.128 dbt_copy 1149 10.8 0.013 0.013 6.292 6.434 dbt_reshape 1136 11.8 2.736 2.934 6.009 6.152 dbt_tas_mm_2 251 15.0 0.001 0.001 5.921 5.923 mp_sync 8688 11.6 4.153 5.231 4.153 5.231 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.141 5.141 dbt_tas_mm_1N 524 15.1 0.001 0.003 4.540 4.934 compute_QP_energies 1 7.0 0.000 0.000 2.677 2.678 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 2.675 2.677 dbt_communicate_buffer 1136 12.8 0.052 0.056 2.425 2.544 mp_waitall_2 3812 15.3 2.376 2.525 2.376 2.525 contract_cubic_gw 21 9.0 0.000 0.000 2.094 2.094 dbt_reserve_blocks_index 2887 13.1 0.067 0.072 1.781 1.932 dbt_reserve_blocks_index_array 2829 12.2 0.009 0.010 1.772 1.923 dbt_tas_reserve_blocks_index 3347 14.5 0.435 0.469 1.765 1.915 dbt_crop 1042 12.0 0.898 0.993 1.431 1.576 dbm_reserve_blocks 3752 15.4 1.424 1.555 1.424 1.555 mp2_ri_gpw_compute_in 1 5.0 0.000 0.001 1.258 1.259 dbt_tas_replicate 405 14.1 0.548 0.707 1.079 1.136 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.081 1.084 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.023 1.026 convert_to_new_pgrid 2421 14.1 0.026 0.029 0.820 0.922 parallel_gemm_fm 105 8.4 0.000 0.000 0.884 0.893 parallel_gemm_fm_cosma 105 9.4 0.884 0.893 0.884 0.893 dbm_copy 1608 15.1 0.789 0.889 0.789 0.889 scf_env_do_scf 1 3.0 0.000 0.000 0.885 0.885 scf_env_do_scf_inner_loop 17 4.0 0.000 0.003 0.885 0.885 mp_max_i 2000 9.8 0.542 0.710 0.542 0.710 compute_W_cubic_GW 10 7.0 0.001 0.001 0.675 0.680 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.310999999999986, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=33.898, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.993, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.965, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.222, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.035, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.022, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=9.197, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=9.997, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.399, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.423, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.424, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.789, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.736, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=4.153, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.376, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.018 283.751 283.751 qs_forces 1 2.0 0.000 0.000 283.188 283.188 rebuild_ks_matrix 7 6.6 0.000 0.000 281.689 281.689 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 281.689 281.689 hfx_ks_matrix 7 8.6 0.000 0.000 279.777 279.777 hfx_ri_update_ks 7 9.6 0.000 0.000 243.368 243.368 hfx_ri_update_ks_Pmat 7 10.6 36.596 36.596 243.363 243.363 dbt_total 783 11.1 0.006 0.006 220.085 220.085 qs_energies 1 3.0 0.000 0.000 213.022 213.022 scf_env_do_scf 1 4.0 0.000 0.000 212.719 212.719 qs_ks_update_qs_env 8 6.0 0.000 0.000 211.578 211.578 dbt_contract 207 12.4 0.053 0.053 200.478 200.478 dbt_tas_total 317 14.0 1.618 1.618 198.120 198.120 dbt_tas_multiply 216 13.5 0.001 0.001 194.931 194.931 dbt_tas_dbm 216 15.5 0.001 0.001 186.187 186.187 dbm_multiply 216 17.5 186.184 186.184 186.184 186.184 hfx_ri_update_ks_Pmat_KS 63 11.6 0.000 0.000 181.714 181.714 dbt_tas_mm_2 91 16.5 0.001 0.001 172.554 172.554 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 134.662 134.662 init_scf_loop 2 5.0 0.000 0.000 78.054 78.054 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 70.115 70.115 hfx_ri_update_forces 1 7.0 1.587 1.587 36.406 36.406 hfx_ri_forces_Pmat_3c 1 8.0 4.767 4.767 18.756 18.756 dbt_copy 409 11.7 0.068 0.068 16.410 16.410 precalc_derivatives 1 8.0 2.350 2.350 13.855 13.855 dbt_reshape 132 13.2 7.446 7.446 11.592 11.592 dbt_tas_mm_3T 77 17.1 0.000 0.000 9.439 9.439 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.273 9.273 build_3c_derivatives 3 9.0 2.584 2.584 7.853 7.853 dbt_tas_reserve_blocks_index 1229 15.4 0.991 0.991 7.717 7.717 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 7.387 7.387 dbm_reserve_blocks 1345 16.4 6.817 6.817 6.817 6.817 dbt_reserve_blocks_index 818 14.4 0.106 0.106 6.072 6.072 dbt_reserve_blocks_index_array 795 13.4 0.006 0.006 5.982 5.982 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.024 43.618 43.628 qs_forces 1 2.0 0.000 0.000 43.488 43.488 rebuild_ks_matrix 7 6.6 0.000 0.000 42.820 42.821 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 42.820 42.821 hfx_ks_matrix 7 8.6 0.000 0.000 41.865 41.871 dbt_total 783 11.1 0.005 0.008 37.182 37.186 dbt_contract 207 12.4 0.022 0.024 28.700 28.710 hfx_ri_update_ks 7 9.6 0.000 0.000 25.807 25.808 hfx_ri_update_ks_Pmat 7 10.6 1.165 1.236 25.807 25.807 dbt_tas_total 317 14.0 0.040 0.053 25.188 25.189 dbt_tas_multiply 216 13.5 0.001 0.001 25.096 25.097 qs_energies 1 3.0 0.000 0.000 24.174 24.174 scf_env_do_scf 1 4.0 0.000 0.001 24.040 24.040 qs_ks_update_qs_env 8 6.0 0.000 0.000 23.515 23.516 dbt_tas_dbm 216 15.5 0.001 0.001 21.111 21.112 dbm_multiply 216 17.5 18.881 19.917 18.881 19.917 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 19.305 19.305 hfx_ri_update_forces 1 7.0 0.055 0.060 16.057 16.063 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 13.659 13.659 hfx_ri_update_ks_Pmat_KS 63 11.6 0.000 0.001 11.965 11.965 hfx_ri_forces_Pmat_3c 1 8.0 0.150 0.166 11.533 11.533 init_scf_loop 2 5.0 0.000 0.000 10.380 10.380 dbt_tas_mm_2 91 16.5 0.001 0.001 10.216 10.217 dbt_copy 497 12.3 0.010 0.011 7.983 8.405 dbt_reshape 365 13.6 3.424 3.620 6.236 6.534 mp_sync 2665 13.0 4.172 6.304 4.172 6.304 dbt_tas_mm_3T 77 17.1 0.000 0.000 5.142 5.751 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 4.704 4.704 dbt_tas_mm_3N 37 15.4 0.000 0.000 3.992 4.222 precalc_derivatives 1 8.0 0.077 0.084 3.449 3.449 dbt_tas_reserve_blocks_index 1356 15.8 0.886 0.912 2.966 3.396 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.046 3.046 dbt_reserve_blocks_index 1051 14.7 0.099 0.105 2.491 2.847 dbt_reserve_blocks_index_array 1028 13.8 0.005 0.006 2.464 2.821 dbm_reserve_blocks 1481 16.7 2.147 2.556 2.147 2.556 dbt_crop 372 13.7 1.677 1.731 2.423 2.554 mp_waitall_2 1138 16.4 1.995 2.081 1.995 2.081 build_3c_derivatives 3 9.0 0.221 0.239 1.904 1.910 dbt_communicate_buffer 365 14.6 0.012 0.013 1.781 1.853 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.583 1.583 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.529 1.532 convert_to_new_pgrid 648 15.5 0.035 0.072 1.212 1.358 dbt_tas_copy 132 12.5 0.555 0.619 1.089 1.239 dbm_copy 452 16.3 1.051 1.193 1.051 1.193 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=41.940999999999974, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=186.184, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=36.596, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=7.446, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=6.817, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.767, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=11.684000000000005, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=18.881, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.165, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.424, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.147, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.15, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=4.172, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=1.995, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 178.923 178.923 qs_energies 1 2.0 0.000 0.000 178.732 178.732 mp2_main 1 3.0 0.000 0.000 174.002 174.002 mp2_gpw_main 1 4.0 0.001 0.001 173.551 173.551 mp2_ri_gpw_compute_in 1 5.0 0.380 0.380 126.324 126.324 mp2_ri_gpw_compute_in_loop 1 6.0 0.010 0.010 117.588 117.588 mp2_eri_3c_integrate_gpw 2656 7.0 0.014 0.014 86.195 86.195 integrate_v_rspace 2666 8.0 0.649 0.649 73.042 73.042 grid_integrate_task_list 2666 9.0 70.401 70.401 70.401 70.401 mp2_ri_gpw_compute_en 1 5.0 0.089 0.089 47.202 47.202 mp2_ri_gpw_compute_en_RI_loop 1 6.0 9.704 9.704 45.300 45.300 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.152 2.152 28.183 28.183 local_gemm 2080 8.0 26.031 26.031 26.031 26.031 dbcsr_multiply_generic 5322 8.0 0.196 0.196 20.525 20.525 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 20.504 20.504 pw_transfer 63872 10.6 1.021 1.021 11.961 11.961 calculate_wavefunction 2656 8.0 8.077 8.077 11.790 11.790 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.256 2.256 10.779 10.779 fft_wrap_pw1pw2 53228 11.4 0.108 0.108 10.688 10.688 multiply_cannon 5322 9.0 0.426 0.426 10.253 10.253 copy_dbcsr_to_fm 2679 8.0 0.026 0.026 9.104 9.104 multiply_cannon_loop 5322 10.0 0.108 0.108 8.918 8.918 get_2c_integrals 1 6.0 0.000 0.000 8.355 8.355 make_m2s 10644 9.0 0.063 0.063 8.052 8.052 make_images 10644 10.0 3.196 3.196 7.754 7.754 fft_wrap_pw1pw2_20 21271 12.4 0.484 0.484 7.558 7.558 compute_2c_integrals 1 7.0 0.005 0.005 7.525 7.525 multiply_cannon_multrec 5322 11.0 7.483 7.483 7.524 7.524 compute_2c_integrals_loop_lm 1 8.0 0.008 0.008 7.513 7.513 mp2_eri_2c_integrate_gpw 1 9.0 0.788 0.788 7.506 7.506 dbcsr_complete_redistribute 2689 9.0 1.186 1.186 7.402 7.402 dbcsr_finalize 10708 9.5 0.179 0.179 6.706 6.706 fft3d_s 53229 13.4 6.596 6.596 6.635 6.635 dbcsr_merge_all 8011 10.3 4.322 4.322 6.061 6.061 mp2_ri_gpw_compute_en_ener 2080 7.0 5.501 5.501 5.501 5.501 scf_env_do_scf 1 3.0 0.000 0.000 4.331 4.331 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.330 4.330 potential_pw2rs 5322 10.0 0.147 0.147 3.939 3.939 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.026 34.713 34.723 qs_energies 1 2.0 0.000 0.000 34.612 34.613 mp2_main 1 3.0 0.000 0.001 32.815 32.816 mp2_gpw_main 1 4.0 0.001 0.001 32.722 32.723 mp2_ri_gpw_compute_in 1 5.0 0.051 0.051 16.985 17.289 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 15.817 16.125 mp2_ri_gpw_compute_en 1 5.0 0.201 0.208 15.651 16.023 mp2_ri_gpw_compute_en_RI_loop 1 6.0 1.365 1.577 14.536 14.544 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 13.749 14.020 integrate_v_rspace 93 8.1 0.101 0.111 13.606 13.874 grid_integrate_task_list 93 9.1 13.320 13.601 13.320 13.601 mp2_ri_gpw_compute_en_expansio 65 7.0 0.100 0.121 10.431 10.598 local_gemm 65 8.0 10.331 10.486 10.331 10.486 mp2_ri_gpw_compute_en_comm 20 7.0 0.071 0.094 2.406 2.924 mp_sendrecv_dm3 1240 8.0 1.894 2.601 1.894 2.601 dbcsr_multiply_generic 176 8.0 0.008 0.008 1.780 2.031 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.764 2.013 scf_env_do_scf 1 3.0 0.000 0.000 1.681 1.682 scf_env_do_scf_inner_loop 10 4.0 0.000 0.001 1.681 1.682 multiply_cannon 176 9.0 0.016 0.018 1.044 1.135 get_2c_integrals 1 6.0 0.000 0.000 1.095 1.124 multiply_cannon_loop 176 10.0 0.002 0.002 0.987 1.074 multiply_cannon_multrec 246 11.0 0.847 0.895 0.852 0.901 make_m2s 352 9.0 0.003 0.003 0.700 0.879 make_images 352 10.0 0.050 0.051 0.688 0.867 compute_2c_integrals 1 7.0 0.002 0.003 0.820 0.835 qs_scf_new_mos 10 5.0 0.000 0.000 0.759 0.766 compute_2c_integrals_loop_lm 1 8.0 0.001 0.003 0.738 0.756 pw_transfer 2120 10.5 0.043 0.045 0.744 0.755 mp2_eri_2c_integrate_gpw 1 9.0 0.197 0.206 0.736 0.755 eigensolver 11 5.8 0.001 0.001 0.724 0.726 mp_min_d 2 7.0 0.323 0.708 0.323 0.708 fft_wrap_pw1pw2 1768 11.4 0.004 0.005 0.688 0.698 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=57.227000000000004, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=70.401, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=26.031, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=9.704, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.077, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=7.483, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=6.956000000000003, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.32, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=10.331, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=1.365, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.847, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=1.894, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.087 0.087 127.076 127.076 qs_energies 1 2.0 0.000 0.000 125.618 125.618 scf_env_do_scf 1 3.0 0.000 0.000 118.930 118.930 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 118.930 118.930 qs_ks_update_qs_env 15 5.0 0.000 0.000 49.471 49.471 rebuild_ks_matrix 15 6.0 0.000 0.000 49.257 49.257 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 49.257 49.257 qs_scf_new_mos 15 5.0 0.000 0.000 43.893 43.893 eigensolver 15 6.0 0.002 0.002 35.952 35.952 qs_vxc_create 15 8.0 0.048 0.048 33.733 33.733 calculate_dispersion_nonloc 15 9.0 6.948 6.948 29.482 29.482 pw_transfer 1191 10.0 0.063 0.063 23.026 23.026 fft_wrap_pw1pw2 1086 11.0 0.008 0.008 22.841 22.841 qs_rho_update_rho_low 16 5.0 0.000 0.000 22.530 22.530 calculate_rho_elec 16 6.0 0.218 0.218 22.530 22.530 cp_fm_diag_elpa 15 7.0 0.000 0.000 22.242 22.242 cp_fm_diag_elpa_base 15 8.0 19.623 19.623 22.241 22.241 grid_collocate_task_list 16 7.0 20.953 20.953 20.953 20.953 fft_wrap_pw1pw2_150 765 12.0 3.610 3.610 16.606 16.606 sum_up_and_integrate 15 8.0 0.148 0.148 14.383 14.383 integrate_v_rspace 15 9.0 0.019 0.019 14.235 14.235 grid_integrate_task_list 15 10.0 13.697 13.697 13.697 13.697 cp_fm_cholesky_restore 45 7.0 11.387 11.387 11.387 11.387 fft3d_s 1087 13.0 10.185 10.185 10.193 10.193 pw_scatter_s 585 13.1 7.103 7.103 7.103 7.103 fft_wrap_pw1pw2_200 197 12.3 0.701 0.701 5.921 5.921 copy_dbcsr_to_fm 16 5.9 0.000 0.000 5.736 5.736 dbcsr_complete_redistribute 46 8.3 2.365 2.365 5.687 5.687 cp_fm_upper_to_full 30 8.0 4.940 4.940 4.940 4.940 vdW_energy 15 10.0 4.553 4.553 4.553 4.553 gspace_mixing 14 5.0 0.171 0.171 4.246 4.246 xc_vxc_pw_create 15 9.0 0.218 0.218 4.204 4.204 broyden_mixing 14 6.0 3.629 3.629 3.629 3.629 init_scf_run 1 3.0 0.000 0.000 3.185 3.185 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.032 3.032 xc_pw_derive 90 11.0 0.001 0.001 2.771 2.771 calculate_dm_sparse 15 6.0 0.019 0.019 2.571 2.571 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.031 66.047 66.058 qs_energies 1 2.0 0.000 0.000 65.754 65.759 scf_env_do_scf 1 3.0 0.000 0.000 61.711 61.712 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 61.711 61.712 qs_ks_update_qs_env 15 5.0 0.000 0.000 24.694 24.710 rebuild_ks_matrix 15 6.0 0.000 0.000 24.661 24.676 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.003 24.661 24.676 qs_rho_update_rho_low 16 5.0 0.000 0.000 22.818 22.824 calculate_rho_elec 16 6.0 0.007 0.007 22.818 22.824 grid_collocate_task_list 16 7.0 19.593 20.334 19.593 20.334 qs_scf_new_mos 15 5.0 0.000 0.000 14.737 14.872 sum_up_and_integrate 15 8.0 0.014 0.019 14.219 14.277 integrate_v_rspace 15 9.0 0.001 0.001 14.204 14.267 eigensolver 15 6.0 0.001 0.002 13.438 13.484 grid_integrate_task_list 15 10.0 13.156 13.396 13.156 13.396 qs_vxc_create 15 8.0 0.001 0.001 10.115 10.127 cp_fm_diag_elpa 15 7.0 0.000 0.000 9.673 9.679 cp_fm_diag_elpa_base 15 8.0 9.523 9.563 9.668 9.668 calculate_dispersion_nonloc 15 9.0 0.911 0.930 8.075 8.104 pw_transfer 1191 10.0 0.080 0.094 7.988 8.052 fft_wrap_pw1pw2 1086 11.0 0.011 0.014 7.831 7.924 fft3d_ps 1086 13.0 2.567 2.929 5.374 5.903 fft_wrap_pw1pw2_150 765 12.0 0.582 0.717 5.006 5.040 cp_fm_cholesky_restore 45 7.0 3.618 3.696 3.618 3.696 rs_pw_transfer 158 9.4 0.001 0.002 2.944 3.315 density_rs2pw 16 7.0 0.001 0.001 3.013 3.295 mp_alltoall_z22v 1086 15.0 2.262 3.117 2.262 3.117 mp_waitany 520 11.3 2.093 2.996 2.093 2.996 fft_wrap_pw1pw2_200 197 12.3 0.425 0.534 2.704 2.743 rs_pw_transfer_RS2PW_200 18 8.8 0.025 0.030 1.434 2.727 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.437 2.438 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.139 2.338 xc_vxc_pw_create 15 9.0 0.015 0.020 2.039 2.084 x_to_yz 585 14.1 0.331 0.358 1.504 1.906 rs_pw_transfer_RS2PW_70 16 9.0 0.006 0.008 0.575 1.784 yz_to_x 501 13.9 0.189 0.218 1.278 1.697 rs_pw_transfer_RS2PW_30 16 9.0 0.008 0.009 0.663 1.638 mp_sendrecv_dv 1488 10.0 0.661 1.636 0.661 1.636 xc_pw_derive 90 11.0 0.001 0.001 1.398 1.483 vdW_energy 15 10.0 1.316 1.392 1.316 1.392 init_scf_run 1 3.0 0.000 0.000 1.376 1.377 build_core_ppnl 1 5.0 1.248 1.370 1.248 1.370 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=51.230999999999995, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=20.953, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=19.623, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.697, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.387, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=10.185, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=17.589999999999996, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=19.593, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=9.523, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.156, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.618, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=2.567, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.079 0.079 306.357 306.357 qs_energies 1 2.0 0.000 0.000 306.215 306.215 ls_scf 1 3.0 0.000 0.000 304.993 304.993 ls_scf_main 1 4.0 0.002 0.002 295.872 295.872 density_matrix_trs4 11 5.0 0.013 0.013 196.465 196.465 arnoldi_extremal 12 6.1 0.000 0.000 97.374 97.374 arnoldi_normal_ev 12 7.1 0.049 0.049 97.374 97.374 build_subspace 23 8.1 0.086 0.086 95.637 95.637 dbcsr_matrix_vector_mult 652 9.0 0.253 0.253 95.272 95.272 dbcsr_matrix_vector_mult_local 652 10.0 93.647 93.647 93.656 93.656 ls_scf_dm_to_ks 11 5.0 0.000 0.000 93.377 93.377 matrix_ls_to_qs 11 6.0 0.000 0.000 89.778 89.778 dbcsr_multiply_generic 185 6.1 0.988 0.988 81.703 81.703 dbcsr_complete_redistribute 23 7.5 34.113 34.113 47.173 47.173 dbcsr_copy_into_existing 11 7.0 46.841 46.841 46.841 46.841 multiply_cannon 185 7.1 0.391 0.391 46.698 46.698 matrix_decluster 11 7.0 0.000 0.000 42.935 42.935 multiply_cannon_loop 185 8.1 0.235 0.235 33.794 33.794 make_m2s 370 7.1 0.040 0.040 29.845 29.845 make_images 370 8.1 13.240 13.240 27.179 27.179 multiply_cannon_multrec 185 9.1 23.772 23.772 24.000 24.000 dbcsr_finalize 646 7.5 0.348 0.348 17.457 17.457 dbcsr_merge_all 597 8.5 2.709 2.709 16.078 16.078 setup_rec_index_2d 370 8.1 12.390 12.390 12.390 12.390 dbcsr_sort_indices 1103 9.9 11.842 11.842 11.842 11.842 tree_to_linear_d 110 9.4 11.783 11.783 11.783 11.783 quick_finalize 395 10.0 0.451 0.451 10.260 10.260 calculate_norms 370 9.1 9.559 9.559 9.559 9.559 dbcsr_special_finalize 370 9.1 0.002 0.002 9.510 9.510 ls_scf_init_scf 1 4.0 0.000 0.000 8.432 8.432 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.097 8.097 dbcsr_dot_sd 144 6.3 7.692 7.692 7.693 7.693 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.423 7.423 dbcsr_frobenius_norm 142 6.1 6.266 6.266 6.268 6.268 matrix_qs_to_ls 12 5.1 0.000 0.000 6.218 6.218 matrix_cluster 12 6.1 0.000 0.000 6.218 6.218 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.020 73.709 73.719 qs_energies 1 2.0 0.000 0.000 73.612 73.612 ls_scf 1 3.0 0.000 0.000 73.562 73.563 ls_scf_main 1 4.0 0.001 0.008 70.674 70.675 density_matrix_trs4 11 5.0 0.007 0.019 68.061 68.144 dbcsr_multiply_generic 185 6.1 0.060 0.076 64.716 65.016 multiply_cannon 185 7.1 0.036 0.040 54.303 54.982 multiply_cannon_loop 185 8.1 0.134 0.165 51.578 52.969 multiply_cannon_multrec 1480 9.1 31.834 36.416 32.146 36.743 mp_waitall_1 11936 10.3 16.464 20.189 16.464 20.189 multiply_cannon_metrocomm3 1480 9.1 0.016 0.021 9.616 15.131 make_m2s 370 7.1 0.036 0.039 7.481 7.545 make_images 370 8.1 0.636 0.679 7.348 7.415 multiply_cannon_metrocomm1 1480 9.1 0.009 0.012 3.813 7.163 calculate_norms 2960 9.1 5.802 6.603 5.802 6.603 make_images_data 370 9.1 0.010 0.013 3.325 3.636 hybrid_alltoall_any 393 9.9 0.229 1.314 2.911 3.099 mp_sum_l 1119 5.6 1.841 2.683 1.841 2.683 arnoldi_extremal 12 6.1 0.000 0.000 2.407 2.431 arnoldi_normal_ev 12 7.1 0.002 0.011 2.406 2.430 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.269 2.377 build_subspace 23 8.1 0.022 0.027 2.310 2.312 ls_scf_init_scf 1 4.0 0.000 0.000 2.230 2.230 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.204 2.211 dbcsr_complete_redistribute 23 7.5 1.222 1.294 2.034 2.147 matrix_ls_to_qs 11 6.0 0.000 0.000 1.979 2.097 make_images_pack 370 9.1 1.870 2.065 1.874 2.069 dbcsr_matrix_vector_mult 652 9.0 0.014 0.049 1.982 2.040 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 2.014 2.016 matrix_decluster 11 7.0 0.000 0.000 1.838 1.956 buffer_matrices_ensure_size 370 8.1 1.654 1.834 1.654 1.834 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 0.989 1.735 dbcsr_matrix_vector_mult_local 652 10.0 1.643 1.702 1.644 1.704 dbcsr_finalize 646 7.5 0.008 0.009 1.510 1.620 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=85.18500000000003, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=93.647, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=46.841, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=34.113, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=23.772, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images", label="make_images", y=13.24, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=9.559, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=12.397000000000006, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.643, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.222, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=31.834, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images", label="make_images", y=0.636, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.802, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=1.841, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.464, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.87, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.008 72.951 72.951 lib_test 1 2.0 0.000 0.000 72.942 72.942 dbcsr_run_tests 3 3.0 0.002 0.002 72.942 72.942 test_multiplies_multiproc 3 4.0 0.001 0.001 56.933 56.933 dbcsr_redistribute 9 5.0 37.277 37.277 38.897 38.897 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.679 16.679 dbcsr_make_random_matrix 9 4.0 12.941 12.941 15.898 15.898 multiply_cannon 9 6.0 0.002 0.002 11.761 11.761 multiply_cannon_loop 9 7.0 0.025 0.025 11.400 11.400 multiply_cannon_multrec 9 8.0 11.375 11.375 11.375 11.375 dbcsr_finalize 27 5.7 0.016 0.016 5.559 5.559 dbcsr_merge_all 18 6.5 2.046 2.046 4.831 4.831 dbcsr_data_release 975 7.6 2.826 2.826 2.826 2.826 tree_to_linear_d 9 7.0 1.916 1.916 1.916 1.916 make_m2s 18 6.0 0.001 0.001 1.716 1.716 make_images 18 7.0 0.584 0.584 1.641 1.641 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.010 17.493 17.497 lib_test 1 2.0 0.000 0.000 17.465 17.483 dbcsr_run_tests 3 3.0 0.000 0.001 17.464 17.483 test_multiplies_multiproc 3 4.0 0.000 0.002 16.638 16.690 dbcsr_multiply_generic 9 5.0 0.001 0.013 15.245 15.347 multiply_cannon 9 6.0 0.001 0.002 13.354 13.703 multiply_cannon_loop 9 7.0 0.002 0.002 13.066 13.430 multiply_cannon_multrec 72 8.0 10.806 11.468 10.806 11.469 mp_waitall_1 576 9.2 2.589 3.366 2.589 3.366 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 1.982 2.909 mp_sum_l 390 2.5 0.510 1.000 0.510 1.000 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.492 0.982 dbcsr_data_release 444 7.6 0.783 0.896 0.783 0.896 dbcsr_make_random_matrix 9 4.0 0.662 0.672 0.798 0.835 dbcsr_finalize 27 5.7 0.000 0.000 0.678 0.802 multiply_cannon_metrocomm3 72 8.0 0.000 0.000 0.271 0.770 dbcsr_destroy 111 5.9 0.000 0.000 0.634 0.763 make_m2s 18 6.0 0.001 0.001 0.713 0.740 make_images 18 7.0 0.021 0.025 0.710 0.737 dbcsr_merge_all 18 6.5 0.098 0.124 0.527 0.625 make_images_data 18 8.0 0.000 0.001 0.372 0.435 dbcsr_redistribute 9 5.0 0.233 0.271 0.398 0.429 hybrid_alltoall_any 18 9.0 0.032 0.157 0.315 0.390 dbcsr_data_copy_aa2 18 7.5 0.316 0.371 0.316 0.371 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.48599999999999, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=37.277, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.941, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.375, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.826, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=2.046, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=1.8119999999999994, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.233, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.662, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=10.806, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.783, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.098, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=2.589, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.51, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.059 0.059 134.057 134.057 qs_mol_dyn_low 1 2.0 0.003 0.003 132.579 132.579 velocity_verlet 5 3.0 0.008 0.008 107.982 107.982 qmmm_el_coupling 6 3.8 0.000 0.000 87.988 87.988 qmmm_elec_with_gaussian 6 4.8 0.013 0.013 87.984 87.984 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 87.346 87.346 qmmm_elec_gaussian_low_G 6 6.8 86.424 86.424 86.424 86.424 qs_forces 6 3.8 0.001 0.001 35.798 35.798 qs_energies 6 4.8 0.000 0.000 31.783 31.783 scf_env_do_scf 6 5.8 0.001 0.001 29.451 29.451 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.691 25.691 rebuild_ks_matrix 45 8.4 0.000 0.000 24.805 24.805 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.805 24.805 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.221 21.221 pw_transfer 966 12.3 0.056 0.056 17.745 17.745 fft_wrap_pw1pw2 801 13.6 0.006 0.006 17.535 17.535 fft_wrap_pw1pw2_150 507 15.2 2.335 2.335 17.119 17.119 qs_vxc_create 45 10.4 0.001 0.001 13.723 13.723 xc_vxc_pw_create 45 11.4 0.694 0.694 13.722 13.722 xc_pw_derive 270 13.4 0.002 0.002 9.679 9.679 fft3d_s 802 15.6 8.008 8.008 8.016 8.016 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.312 7.312 calculate_rho_elec 45 8.9 0.564 0.564 7.312 7.312 xc_rho_set_and_dset_create 45 12.4 0.635 0.635 7.095 7.095 pw_scatter_s 429 15.8 5.878 5.878 5.878 5.878 xc_pw_divergence 45 12.4 0.001 0.001 5.858 5.858 qmmm_forces 6 3.8 0.002 0.002 5.567 5.567 qmmm_forces_with_gaussian 6 4.8 0.019 0.019 5.230 5.230 pw_integral_ab 2539 7.4 4.591 4.591 4.591 4.591 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.498 4.498 qs_ks_ddapc 45 10.4 0.001 0.001 4.239 4.239 init_scf_loop 6 6.8 0.000 0.000 3.755 3.755 qmmm_forces_gaussian_low_G 6 6.8 3.726 3.726 3.726 3.726 sum_up_and_integrate 45 10.4 0.508 0.508 3.627 3.627 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.591 3.591 density_rs2pw 45 9.9 0.002 0.002 3.408 3.408 grid_collocate_task_list 45 9.9 3.340 3.340 3.340 3.340 integrate_v_rspace 45 11.4 0.006 0.006 3.119 3.119 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.041 0.059 61.115 61.125 qs_mol_dyn_low 1 2.0 0.003 0.004 59.907 59.970 qs_forces 6 3.8 0.001 0.001 44.804 44.804 qs_energies 6 4.8 0.000 0.000 42.833 42.833 scf_env_do_scf 6 5.8 0.000 0.001 41.821 41.821 scf_env_do_scf_inner_loop 113 6.2 0.003 0.026 40.158 40.159 rebuild_ks_matrix 119 8.1 0.000 0.000 30.026 30.036 qs_ks_build_kohn_sham_matrix 119 9.1 0.015 0.017 30.025 30.035 qs_ks_update_qs_env 119 7.3 0.001 0.001 28.359 28.370 velocity_verlet 5 3.0 0.002 0.004 24.271 24.274 pw_transfer 2446 12.3 0.181 0.196 21.680 22.167 fft_wrap_pw1pw2 2059 13.4 0.022 0.027 21.278 21.807 fft_wrap_pw1pw2_150 1321 14.9 3.444 4.093 20.513 21.038 qs_vxc_create 119 10.1 0.002 0.003 16.424 16.443 xc_vxc_pw_create 119 11.1 0.153 0.219 16.421 16.441 fft3d_ps 2059 15.4 7.542 8.608 14.033 15.488 xc_pw_derive 714 13.1 0.007 0.009 12.684 12.937 qs_rho_update_rho_low 119 7.3 0.001 0.001 11.753 11.759 calculate_rho_elec 119 8.3 0.049 0.056 11.752 11.758 sum_up_and_integrate 119 10.1 0.074 0.091 9.912 9.946 integrate_v_rspace 119 11.1 0.003 0.004 9.838 9.870 xc_pw_divergence 119 12.1 0.004 0.005 8.345 8.581 xc_rho_set_and_dset_create 119 12.1 0.341 0.424 7.666 7.875 qmmm_forces 6 3.8 0.002 0.002 7.725 7.725 qmmm_forces_with_gaussian 6 4.8 0.008 0.010 7.314 7.515 density_rs2pw 119 9.3 0.006 0.007 6.868 7.146 rs_pw_transfer 988 11.5 0.012 0.015 6.672 7.015 mp_alltoall_z22v 2059 17.4 5.070 6.670 5.070 6.670 qmmm_el_coupling 6 3.8 0.000 0.000 6.385 6.578 qmmm_elec_with_gaussian 6 4.8 0.003 0.004 6.383 6.576 potential_pw2rs 119 12.1 0.006 0.007 5.923 5.952 grid_collocate_task_list 119 9.3 4.637 4.900 4.637 4.900 x_to_yz 1095 16.8 0.830 0.903 3.408 4.147 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.087 4.123 yz_to_x 964 16.0 0.537 0.656 3.029 4.015 grid_integrate_task_list 119 12.1 3.612 3.830 3.612 3.830 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.493 3.537 qmmm_forces_gaussian_low_G 6 6.8 3.362 3.401 3.362 3.401 mp_waitany 4028 12.8 2.562 3.206 2.562 3.206 qmmm_elec_gaussian_low_G 6 6.8 2.894 2.929 2.894 2.929 pw_restrict_s3 18 5.8 1.405 1.470 2.580 2.796 pw_gather_p 964 15.0 2.158 2.652 2.158 2.652 rs_pw_transfer_PW2RS_150 125 13.9 0.716 0.878 2.509 2.597 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.175 2.368 pw_prolongate_s3 18 6.8 1.192 1.268 2.175 2.368 qs_ks_ddapc 119 10.1 0.002 0.003 2.218 2.354 rs_pw_transfer_RS2PW_150 125 11.2 0.566 0.662 1.896 2.236 qs_scf_new_mos 113 7.2 0.000 0.000 2.085 2.091 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.085 2.090 ot_scf_mini 113 9.2 0.001 0.001 2.002 2.006 dbcsr_multiply_generic 2588 12.3 0.059 0.060 1.860 1.902 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.686 1.687 init_scf_loop 6 6.8 0.000 0.000 1.660 1.660 pw_integral_ab 2761 7.7 1.133 1.292 1.532 1.653 mp_sum_d 5822 12.2 1.058 1.630 1.058 1.630 pw_scatter_p 1095 15.8 1.572 1.617 1.572 1.617 mp_waitall_1 177795 16.4 1.230 1.495 1.230 1.495 rs_pw_transfer_PW2RS_40 119 14.1 0.224 0.264 1.048 1.313 mp_sum_dm3 33 5.7 1.250 1.309 1.250 1.309 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=19.754999999999995, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=86.424, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.008, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.878, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.591, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.726, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.34, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=2.335, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=29.421, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.894, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.133, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.362, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.637, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=3.444, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.542, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.612, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.07, yerr=0.0 Summary: Performance test took 35 minutes. Status: OK Removing intermediate container c6d52bb8feaf ---> 792ccfd2eb76 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 70e0301fa30f Removing intermediate container 70e0301fa30f ---> 785c8b7aaed5 Step 42/42 : ENTRYPOINT [] ---> Running in 802f2bf1ba17 Removing intermediate container 802f2bf1ba17 ---> f98085822ed3 [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built f98085822ed3 Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2023-03-24 20:18:28+00:00