StartDate: 2023-06-26 12:44:51+00:00 CpuId: 32x AMD EPYC (3rd Gen) (Milan) [Zen 3], 7nm (SMT disabled) CommitSHA: c8a38c8314c6f43066d6e1b608696bdbcb5b4f35 CommitTime: 2023-06-26 12:10:15 +0200 CommitAuthor: Frederick Stein CommitSubject: Fix some initialization of allocatable components #################### Building Image cp2k-perf-openmp #################### Dockerfile: /tools/docker/Dockerfile.test_performance Build-Path: / Build-Args: GIT_COMMIT_SHA=c8a38c8314c6f43066d6e1b608696bdbcb5b4f35 Build-Cache: Yes Populating docker build cache... done. DEPRECATED: The legacy builder is deprecated and will be removed in a future release. BuildKit is currently disabled; enable it by removing the DOCKER_BUILDKIT=0 environment-variable. Sending build context to Docker daemon 371.1MB Step 1/42 : FROM ubuntu:22.04 22.04: Pulling from library/ubuntu 6b851dcae6ca: Pulling fs layer 6b851dcae6ca: Download complete 6b851dcae6ca: Pull complete Digest: sha256:6120be6a2b7ce665d0cbddc3ce6eae60fe94637c6a66985312d1f02f63cc0bcd Status: Downloaded newer image for ubuntu:22.04 ---> 99284ca6cea0 Step 2/42 : WORKDIR /opt/cp2k-toolchain ---> Using cache ---> a4b0c172f62f Step 3/42 : COPY ./tools/toolchain/install_requirements*.sh ./ ---> Using cache ---> 6bb3b047e99c Step 4/42 : RUN ./install_requirements.sh ubuntu:22.04 ---> Using cache ---> 73d80ef9e4a3 Step 5/42 : RUN mkdir scripts ---> Using cache ---> b7df9fba15d6 Step 6/42 : COPY ./tools/toolchain/scripts/VERSION ./tools/toolchain/scripts/parse_if.py ./tools/toolchain/scripts/tool_kit.sh ./tools/toolchain/scripts/common_vars.sh ./tools/toolchain/scripts/signal_trap.sh ./tools/toolchain/scripts/get_openblas_arch.sh ./scripts/ ---> Using cache ---> 5c15e99590c4 Step 7/42 : COPY ./tools/toolchain/install_cp2k_toolchain.sh . ---> Using cache ---> d49e3391ce09 Step 8/42 : RUN ./install_cp2k_toolchain.sh --install-all --with-gcc=system --dry-run ---> Using cache ---> fda0d3a90bfc Step 9/42 : COPY ./tools/toolchain/scripts/stage0/ ./scripts/stage0/ ---> Using cache ---> 11c1a44a6047 Step 10/42 : RUN ./scripts/stage0/install_stage0.sh && rm -rf ./build ---> Using cache ---> c0966e1e9481 Step 11/42 : COPY ./tools/toolchain/scripts/stage1/ ./scripts/stage1/ ---> Using cache ---> 2ca131548388 Step 12/42 : RUN ./scripts/stage1/install_stage1.sh && rm -rf ./build ---> Using cache ---> e81a3481b9f8 Step 13/42 : COPY ./tools/toolchain/scripts/stage2/ ./scripts/stage2/ ---> Using cache ---> e4cc5157ace6 Step 14/42 : RUN ./scripts/stage2/install_stage2.sh && rm -rf ./build ---> Using cache ---> fce6a9f1c0f0 Step 15/42 : COPY ./tools/toolchain/scripts/stage3/ ./scripts/stage3/ ---> Using cache ---> 20082ea74ce0 Step 16/42 : RUN ./scripts/stage3/install_stage3.sh && rm -rf ./build ---> Using cache ---> 1ed0f8ec5ae4 Step 17/42 : COPY ./tools/toolchain/scripts/stage4/ ./scripts/stage4/ ---> Using cache ---> 0d45c2699d8d Step 18/42 : RUN ./scripts/stage4/install_stage4.sh && rm -rf ./build ---> Using cache ---> 786587ed7b92 Step 19/42 : COPY ./tools/toolchain/scripts/stage5/ ./scripts/stage5/ ---> Using cache ---> b117d04fe9c9 Step 20/42 : RUN ./scripts/stage5/install_stage5.sh && rm -rf ./build ---> Using cache ---> 2089654a5431 Step 21/42 : COPY ./tools/toolchain/scripts/stage6/ ./scripts/stage6/ ---> Using cache ---> a2413d2964a0 Step 22/42 : RUN ./scripts/stage6/install_stage6.sh && rm -rf ./build ---> Using cache ---> f37288abb7ef Step 23/42 : COPY ./tools/toolchain/scripts/stage7/ ./scripts/stage7/ ---> Using cache ---> bad1ddb6076e Step 24/42 : RUN ./scripts/stage7/install_stage7.sh && rm -rf ./build ---> Using cache ---> 1281072d1860 Step 25/42 : COPY ./tools/toolchain/scripts/stage8/ ./scripts/stage8/ ---> Using cache ---> dd1a8cf997d1 Step 26/42 : RUN ./scripts/stage8/install_stage8.sh && rm -rf ./build ---> Using cache ---> 651a0762955e Step 27/42 : COPY ./tools/toolchain/scripts/arch_base.tmpl ./tools/toolchain/scripts/generate_arch_files.sh ./scripts/ ---> Using cache ---> 8754272b0a19 Step 28/42 : RUN ./scripts/generate_arch_files.sh && rm -rf ./build ---> Using cache ---> adb5af6166e4 Step 29/42 : WORKDIR /opt/cp2k ---> Using cache ---> e41b12ebf40c Step 30/42 : COPY ./Makefile . ---> Using cache ---> f6260b8939fb Step 31/42 : COPY ./src ./src ---> Using cache ---> 710aa42aa03d Step 32/42 : COPY ./exts ./exts ---> Using cache ---> 72b5f15890e7 Step 33/42 : COPY ./tools/build_utils ./tools/build_utils ---> Using cache ---> c98e8d9f73a6 Step 34/42 : RUN /bin/bash -c " mkdir -p arch && ln -vs /opt/cp2k-toolchain/install/arch/local.psmp ./arch/ && echo 'Compiling cp2k...' && source /opt/cp2k-toolchain/install/setup && ( make -j ARCH=local VERSION=psmp &> /dev/null || true ) && rm -rf lib obj && ( [ ! -f ./exe/local/cp2k.psmp ] || ldd ./exe/local/cp2k.psmp | grep -q libmpi )" ---> Running in d125aa1737ff './arch/local.psmp' -> '/opt/cp2k-toolchain/install/arch/local.psmp' Compiling cp2k... Removing intermediate container d125aa1737ff ---> 535cc15959cb Step 35/42 : COPY ./data ./data ---> cabab19196aa Step 36/42 : COPY ./tests ./tests ---> c48f30b5da6f Step 37/42 : COPY ./tools/regtesting ./tools/regtesting ---> c8662460958e Step 38/42 : COPY ./benchmarks ./benchmarks ---> c89cfe2fd0af Step 39/42 : COPY ./tools/docker/scripts/test_performance.sh ./tools/docker/scripts/plot_performance.py ./ ---> 3bf30966c6db Step 40/42 : RUN ./test_performance.sh "local" 2>&1 | tee report.log ---> Running in a567ee261e75 ========== Compiling CP2K ========== Compiling cp2k... done. Checking benchmark inputs... Found 70 input files and 0 errors. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.034 0.034 88.885 88.885 qs_mol_dyn_low 1 2.0 0.003 0.003 88.233 88.233 qs_forces 11 3.9 0.001 0.001 88.192 88.192 qs_energies 11 4.9 0.001 0.001 82.162 82.162 scf_env_do_scf 11 5.9 0.001 0.001 70.960 70.960 velocity_verlet 10 3.0 0.002 0.002 57.489 57.489 scf_env_do_scf_inner_loop 108 6.5 0.012 0.012 54.275 54.275 rebuild_ks_matrix 119 8.3 0.001 0.001 20.408 20.408 qs_ks_build_kohn_sham_matrix 119 9.3 0.013 0.013 20.407 20.407 dbcsr_multiply_generic 2286 12.5 0.162 0.162 19.907 19.907 qs_scf_new_mos 108 7.5 0.001 0.001 19.832 19.832 qs_scf_loop_do_ot 108 8.5 0.001 0.001 19.832 19.832 qs_rho_update_rho_low 119 7.7 0.001 0.001 19.440 19.440 calculate_rho_elec 119 8.7 0.966 0.966 19.439 19.439 qs_ks_update_qs_env 119 7.6 0.001 0.001 18.808 18.808 ot_scf_mini 108 9.5 0.002 0.002 18.478 18.478 init_scf_loop 11 6.9 0.000 0.000 16.532 16.532 grid_collocate_task_list 119 9.7 15.069 15.069 15.069 15.069 prepare_preconditioner 11 7.9 0.000 0.000 14.003 14.003 make_preconditioner 11 8.9 0.000 0.000 14.003 14.003 sum_up_and_integrate 119 10.3 0.790 0.790 13.245 13.245 make_full_inverse_cholesky 11 9.9 0.024 0.024 12.850 12.850 integrate_v_rspace 119 11.3 0.090 0.090 12.454 12.454 ot_mini 108 10.5 0.001 0.001 11.963 11.963 make_m2s 4572 13.5 0.047 0.047 10.881 10.881 grid_integrate_task_list 119 12.3 10.456 10.456 10.456 10.456 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 6.659 6.659 qs_ot_get_derivative 108 11.5 0.001 0.001 6.245 6.245 pw_transfer 1439 11.6 0.068 0.068 5.999 5.999 dbcsr_make_dense_low 5837 15.5 0.081 0.081 5.900 5.900 make_dense_data 5837 16.5 5.207 5.207 5.805 5.805 fft_wrap_pw1pw2 1201 12.6 0.007 0.007 5.783 5.783 ot_diis_step 108 11.5 0.004 0.004 5.714 5.714 make_images 4572 14.5 2.118 2.118 5.374 5.374 dbcsr_make_images_dense 3978 14.8 0.017 0.017 5.201 5.201 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 5.193 5.193 apply_single 119 13.6 0.000 0.000 5.193 5.193 fft_wrap_pw1pw2_140 487 13.2 0.444 0.444 4.963 4.963 multiply_cannon 2286 13.5 0.182 0.182 4.963 4.963 multiply_cannon_loop 2286 14.5 0.046 0.046 4.486 4.486 cp_fm_cholesky_decompose 22 10.9 4.449 4.449 4.449 4.449 multiply_cannon_multrec 2286 15.5 4.388 4.388 4.439 4.439 cp_fm_cholesky_invert 11 10.9 4.216 4.216 4.216 4.216 init_scf_run 11 5.9 0.002 0.002 3.818 3.818 scf_env_initial_rho_setup 11 6.9 0.001 0.001 3.817 3.817 dbcsr_complete_redistribute 329 12.2 1.903 1.903 3.746 3.746 dbcsr_copy 2102 12.0 0.242 0.242 3.712 3.712 qs_env_update_s_mstruct 11 6.9 0.000 0.000 3.661 3.661 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.464 3.464 dbcsr_copy_into_existing 22 7.9 3.413 3.413 3.413 3.413 density_rs2pw 119 9.7 0.005 0.005 3.404 3.404 wfi_extrapolate 11 7.9 0.001 0.001 3.346 3.346 qs_create_task_list 11 7.9 0.000 0.000 3.247 3.247 generate_qs_task_list 11 8.9 1.955 1.955 3.247 3.247 copy_dbcsr_to_fm 153 11.3 0.002 0.002 3.107 3.107 qs_ot_get_p 119 10.4 0.001 0.001 2.981 2.981 fft3d_s 1202 14.6 2.892 2.892 2.898 2.898 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 2.580 2.580 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.565 2.565 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.366 2.366 dbcsr_data_release 279532 16.0 2.222 2.222 2.222 2.222 pw_poisson_solve 119 10.3 0.347 0.347 1.991 1.991 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.969 1.969 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.938 1.938 potential_pw2rs 119 12.3 0.046 0.046 1.909 1.909 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.863 1.863 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.010 0.022 48.368 48.378 qs_mol_dyn_low 1 2.0 0.003 0.004 48.119 48.124 qs_forces 11 3.9 0.001 0.002 48.078 48.079 qs_energies 11 4.9 0.001 0.001 45.004 45.007 scf_env_do_scf 11 5.9 0.000 0.002 41.292 41.292 scf_env_do_scf_inner_loop 108 6.5 0.003 0.020 38.133 38.133 velocity_verlet 10 3.0 0.002 0.003 28.593 28.594 rebuild_ks_matrix 119 8.3 0.001 0.001 17.278 17.343 qs_ks_build_kohn_sham_matrix 119 9.3 0.018 0.020 17.278 17.342 qs_ks_update_qs_env 119 7.6 0.001 0.001 15.346 15.407 dbcsr_multiply_generic 2286 12.5 0.079 0.082 14.687 14.824 qs_rho_update_rho_low 119 7.7 0.001 0.001 13.927 13.937 calculate_rho_elec 119 8.7 0.031 0.034 13.927 13.936 sum_up_and_integrate 119 10.3 0.035 0.038 12.562 12.583 integrate_v_rspace 119 11.3 0.004 0.005 12.527 12.548 qs_scf_new_mos 108 7.5 0.001 0.001 11.666 11.735 qs_scf_loop_do_ot 108 8.5 0.001 0.001 11.666 11.735 multiply_cannon 2286 13.5 0.170 0.177 10.906 11.132 ot_scf_mini 108 9.5 0.002 0.002 11.011 11.083 multiply_cannon_loop 2286 14.5 0.111 0.121 10.292 10.521 grid_collocate_task_list 119 9.7 9.090 9.422 9.090 9.422 mp_waitall_1 158411 16.6 8.434 8.720 8.434 8.720 grid_integrate_task_list 119 12.3 8.148 8.356 8.148 8.356 multiply_cannon_metrocomm3 18288 15.5 0.050 0.054 6.494 6.780 ot_mini 108 10.5 0.001 0.001 6.330 6.407 rs_pw_transfer 974 11.9 0.014 0.015 4.674 5.084 density_rs2pw 119 9.7 0.006 0.006 4.149 4.564 pw_transfer 1439 11.6 0.096 0.104 4.435 4.496 fft_wrap_pw1pw2 1201 12.6 0.010 0.011 4.266 4.327 fft_wrap_pw1pw2_140 487 13.2 0.723 0.768 3.693 3.804 potential_pw2rs 119 12.3 0.007 0.008 3.752 3.762 multiply_cannon_multrec 18288 15.5 3.388 3.500 3.400 3.513 qs_ot_get_derivative 108 11.5 0.001 0.001 3.210 3.280 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 3.118 3.179 apply_single 119 13.6 0.000 0.001 3.117 3.179 init_scf_loop 11 6.9 0.000 0.000 3.141 3.142 ot_diis_step 108 11.5 0.003 0.004 3.095 3.095 fft3d_ps 1201 14.6 1.439 1.580 2.752 2.879 make_m2s 4572 13.5 0.047 0.057 2.679 2.748 init_scf_run 11 5.9 0.000 0.004 2.628 2.628 scf_env_initial_rho_setup 11 6.9 0.000 0.003 2.627 2.628 make_images 4572 14.5 0.119 0.124 2.333 2.407 wfi_extrapolate 11 7.9 0.001 0.001 2.400 2.400 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.067 2.079 mp_waitany 9880 13.7 1.504 1.974 1.504 1.974 qs_ot_get_p 119 10.4 0.001 0.001 1.560 1.654 rs_pw_transfer_RS2PW_140 130 11.5 0.218 0.261 1.093 1.501 make_images_data 4572 15.5 0.038 0.042 1.332 1.429 rs_pw_transfer_PW2RS_140 130 13.9 0.432 0.454 1.328 1.355 hybrid_alltoall_any 4725 16.4 0.071 0.207 1.186 1.311 prepare_preconditioner 11 7.9 0.000 0.000 1.256 1.275 make_preconditioner 11 8.9 0.000 0.000 1.256 1.275 mp_alltoall_d11v 2130 13.8 0.905 1.196 0.905 1.196 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.140 1.151 mp_alltoall_z22v 1201 16.6 1.031 1.124 1.031 1.124 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.048 1.082 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 1.006 1.046 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 1.018 1.020 rs_pw_transfer_PW2RS_50 119 14.3 0.377 0.402 0.986 1.018 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=49.31600000000001, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=15.069, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=10.456, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.207, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.449, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.388, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=17.804000000000006, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=9.09, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=8.148, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.388, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.434, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.504, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.036 0.036 112.959 112.959 qs_mol_dyn_low 1 2.0 0.003 0.003 112.258 112.258 qs_forces 11 3.9 0.001 0.001 112.218 112.218 qs_energies 11 4.9 0.001 0.001 104.485 104.485 scf_env_do_scf 11 5.9 0.001 0.001 91.264 91.264 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 72.860 72.860 velocity_verlet 10 3.0 0.002 0.002 72.696 72.696 rebuild_ks_matrix 107 8.3 0.001 0.001 33.680 33.680 qs_ks_build_kohn_sham_matrix 107 9.3 0.011 0.011 33.680 33.680 qs_rho_update_rho_low 107 7.7 0.000 0.000 31.972 31.972 calculate_rho_elec 107 8.7 0.859 0.859 31.972 31.972 qs_ks_update_qs_env 107 7.6 0.001 0.001 30.258 30.258 grid_collocate_task_list 107 9.7 27.552 27.552 27.552 27.552 sum_up_and_integrate 107 10.3 0.700 0.700 27.049 27.049 integrate_v_rspace 107 11.3 0.082 0.082 26.349 26.349 grid_integrate_task_list 107 12.3 24.517 24.517 24.517 24.517 init_scf_loop 11 6.9 0.000 0.000 18.256 18.256 dbcsr_multiply_generic 1966 12.4 0.147 0.147 18.056 18.056 qs_scf_new_mos 96 7.5 0.001 0.001 17.448 17.448 qs_scf_loop_do_ot 96 8.5 0.001 0.001 17.447 17.447 ot_scf_mini 96 9.5 0.002 0.002 16.198 16.198 prepare_preconditioner 11 7.9 0.000 0.000 14.123 14.123 make_preconditioner 11 8.9 0.000 0.000 14.123 14.123 make_full_inverse_cholesky 11 9.9 0.027 0.027 12.924 12.924 ot_mini 96 10.5 0.001 0.001 10.597 10.597 make_m2s 3932 13.4 0.041 0.041 9.901 9.901 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 7.065 7.065 pw_transfer 1295 11.6 0.061 0.061 5.966 5.966 fft_wrap_pw1pw2 1081 12.6 0.006 0.006 5.774 5.774 qs_ot_get_derivative 96 11.5 0.001 0.001 5.582 5.582 init_scf_run 11 5.9 0.002 0.002 5.506 5.506 scf_env_initial_rho_setup 11 6.9 0.001 0.001 5.505 5.505 dbcsr_make_dense_low 4961 15.5 0.086 0.086 5.301 5.301 make_dense_data 4961 16.5 4.610 4.610 5.203 5.203 ot_diis_step 96 11.5 0.003 0.003 5.013 5.013 make_images 3932 14.4 1.935 1.935 5.011 5.011 fft_wrap_pw1pw2_140 439 13.2 0.518 0.518 4.841 4.841 wfi_extrapolate 11 7.9 0.001 0.001 4.800 4.800 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 4.694 4.694 apply_single 107 13.6 0.000 0.000 4.694 4.694 multiply_cannon 1966 13.4 0.158 0.158 4.634 4.634 dbcsr_make_images_dense 3386 14.7 0.015 0.015 4.608 4.608 cp_fm_cholesky_decompose 22 10.9 4.562 4.562 4.562 4.562 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.286 4.286 multiply_cannon_loop 1966 14.4 0.037 0.037 4.223 4.223 multiply_cannon_multrec 1966 15.4 4.136 4.136 4.185 4.185 cp_fm_cholesky_invert 11 10.9 4.099 4.099 4.099 4.099 qs_env_update_s_mstruct 11 6.9 0.000 0.000 4.061 4.061 dbcsr_complete_redistribute 317 12.2 1.919 1.919 3.874 3.874 qs_create_task_list 11 7.9 0.000 0.000 3.677 3.677 generate_qs_task_list 11 8.9 2.391 2.391 3.677 3.677 dbcsr_copy 1855 11.9 0.220 0.220 3.661 3.661 density_rs2pw 107 9.7 0.004 0.004 3.561 3.561 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 3.445 3.445 dbcsr_copy_into_existing 22 7.9 3.404 3.404 3.404 3.404 copy_dbcsr_to_fm 147 11.2 0.002 0.002 3.095 3.095 fft3d_s 1082 14.6 2.897 2.897 2.902 2.902 build_core_hamiltonian_matrix 11 6.9 0.000 0.000 2.587 2.587 qs_ot_get_p 107 10.4 0.001 0.001 2.507 2.507 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 2.386 2.386 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.021 74.333 74.344 qs_mol_dyn_low 1 2.0 0.003 0.003 74.192 74.196 qs_forces 11 3.9 0.002 0.002 74.151 74.151 qs_energies 11 4.9 0.001 0.001 69.228 69.231 scf_env_do_scf 11 5.9 0.000 0.002 64.088 64.089 scf_env_do_scf_inner_loop 96 6.5 0.003 0.017 59.285 59.286 velocity_verlet 10 3.0 0.002 0.003 44.209 44.210 rebuild_ks_matrix 107 8.3 0.000 0.001 31.617 31.662 qs_ks_build_kohn_sham_matrix 107 9.3 0.017 0.019 31.617 31.661 qs_ks_update_qs_env 107 7.6 0.001 0.001 27.802 27.844 sum_up_and_integrate 107 10.3 0.035 0.039 27.260 27.277 integrate_v_rspace 107 11.3 0.004 0.005 27.225 27.241 qs_rho_update_rho_low 107 7.7 0.001 0.001 26.364 26.371 calculate_rho_elec 107 8.7 0.028 0.030 26.363 26.371 grid_integrate_task_list 107 12.3 22.839 23.205 22.839 23.205 grid_collocate_task_list 107 9.7 21.965 22.482 21.965 22.482 dbcsr_multiply_generic 1966 12.4 0.071 0.077 13.906 14.037 qs_scf_new_mos 96 7.5 0.001 0.001 10.926 10.973 qs_scf_loop_do_ot 96 8.5 0.001 0.001 10.926 10.972 multiply_cannon 1966 13.4 0.153 0.163 10.273 10.639 ot_scf_mini 96 9.5 0.002 0.002 10.334 10.382 multiply_cannon_loop 1966 14.4 0.103 0.108 9.713 9.966 mp_waitall_1 136719 16.5 7.978 8.392 7.978 8.392 multiply_cannon_metrocomm3 15728 15.4 0.045 0.047 6.220 6.522 ot_mini 96 10.5 0.001 0.001 5.994 6.044 rs_pw_transfer 878 11.9 0.012 0.014 4.433 5.034 init_scf_loop 11 6.9 0.000 0.000 4.786 4.786 density_rs2pw 107 9.7 0.005 0.006 3.792 4.375 init_scf_run 11 5.9 0.000 0.004 4.046 4.046 scf_env_initial_rho_setup 11 6.9 0.000 0.003 4.046 4.046 pw_transfer 1295 11.6 0.088 0.095 3.963 4.020 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.940 3.946 fft_wrap_pw1pw2 1081 12.6 0.009 0.010 3.810 3.866 wfi_extrapolate 11 7.9 0.001 0.001 3.687 3.687 potential_pw2rs 107 12.3 0.006 0.007 3.471 3.482 fft_wrap_pw1pw2_140 439 13.2 0.657 0.704 3.302 3.394 multiply_cannon_multrec 15728 15.4 3.110 3.214 3.122 3.227 qs_ot_get_derivative 96 11.5 0.001 0.001 3.093 3.140 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 2.953 3.033 apply_single 107 13.6 0.000 0.001 2.952 3.033 ot_diis_step 96 11.5 0.003 0.003 2.886 2.886 make_m2s 3932 13.4 0.042 0.052 2.472 2.518 fft3d_ps 1081 14.6 1.293 1.468 2.419 2.516 mp_waitany 8968 13.7 1.595 2.256 1.595 2.256 make_images 3932 14.4 0.107 0.112 2.161 2.211 mp_alltoall_d11v 1998 13.7 1.156 1.780 1.156 1.780 rs_pw_transfer_RS2PW_140 118 11.5 0.173 0.206 1.140 1.743 qs_ot_get_p 107 10.4 0.001 0.001 1.465 1.538 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=47.58200000000001, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=27.552, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=24.517, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="make_dense_data", label="make_dense_data", y=4.61, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=4.562, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.136, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=16.845999999999997, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=21.965, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.839, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.11, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=1.595, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=7.978, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.205 0.205 105.576 105.576 qs_energies 1 2.0 0.000 0.000 104.713 104.713 scf_env_do_scf 1 3.0 0.000 0.000 103.549 103.549 qs_ks_update_qs_env 8 5.0 0.000 0.000 98.557 98.557 rebuild_ks_matrix 7 6.0 0.000 0.000 98.501 98.501 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.001 98.501 98.501 hfx_ks_matrix 7 8.0 0.000 0.000 89.707 89.707 integrate_four_center 7 9.0 1.633 1.633 89.670 89.670 integrate_four_center_main 7 10.0 0.568 0.568 81.721 81.721 integrate_four_center_bin 448 11.0 81.153 81.153 81.153 81.153 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 56.688 56.688 init_scf_loop 1 4.0 0.000 0.000 46.851 46.851 integrate_four_center_load 7 10.0 0.006 0.006 6.088 6.088 hfx_load_balance 1 11.0 0.001 0.001 6.082 6.082 qs_vxc_create 14 8.0 0.000 0.000 3.091 3.091 xc_vxc_pw_create 14 9.0 0.148 0.148 3.091 3.091 hfx_load_balance_count 1 12.0 3.033 3.033 3.033 3.033 hfx_load_balance_bin 1 12.0 3.033 3.033 3.033 3.033 calculate_rho_elec 15 7.4 0.118 0.118 2.469 2.469 prepare_preconditioner 1 5.0 0.000 0.000 2.352 2.352 make_preconditioner 1 6.0 0.000 0.000 2.352 2.352 xc_rho_set_and_dset_create 14 10.0 0.102 0.102 2.324 2.324 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.221 0.241 97.200 97.211 qs_energies 1 2.0 0.000 0.000 96.769 96.776 scf_env_do_scf 1 3.0 0.000 0.000 96.418 96.419 qs_ks_update_qs_env 8 5.0 0.000 0.000 94.310 94.310 rebuild_ks_matrix 7 6.0 0.000 0.000 94.302 94.302 qs_ks_build_kohn_sham_matrix 7 7.0 0.001 0.002 94.302 94.302 hfx_ks_matrix 7 8.0 0.000 0.000 88.547 88.548 integrate_four_center 7 9.0 0.053 0.333 88.538 88.539 integrate_four_center_main 7 10.0 0.003 0.003 80.150 81.496 integrate_four_center_bin 448 11.0 80.147 81.493 80.147 81.493 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 53.821 53.821 init_scf_loop 1 4.0 0.000 0.000 42.596 42.596 integrate_four_center_load 7 10.0 0.000 0.000 5.676 5.677 hfx_load_balance 1 11.0 0.001 0.001 5.676 5.677 mp_sync 56 11.2 2.037 3.243 2.037 3.243 hfx_load_balance_count 1 12.0 2.777 2.847 2.777 2.847 hfx_load_balance_bin 1 12.0 2.778 2.824 2.778 2.824 qs_vxc_create 14 8.0 0.000 0.000 2.560 2.560 xc_vxc_pw_create 14 9.0 0.007 0.008 2.559 2.559 xc_rho_set_and_dset_create 14 10.0 0.010 0.011 1.993 2.098 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=15.950999999999993, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=81.153, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=3.033, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=3.033, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=1.633, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=0.568, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="CP2K", label="CP2K", y=0.205, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=9.183999999999983, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=80.147, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=2.777, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=2.778, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.053, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.003, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="CP2K", label="CP2K", y=0.221, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=2.037, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.014 79.644 79.644 qs_energies 1 2.0 0.000 0.000 79.229 79.229 mp2_main 1 3.0 0.000 0.000 76.451 76.451 mp2_gpw_main 1 4.0 0.000 0.000 76.352 76.352 rpa_ri_compute_en 1 5.0 0.000 0.000 72.672 72.672 rpa_num_int 1 6.0 0.001 0.001 72.666 72.666 compute_mat_P_omega 1 7.0 0.003 0.003 62.769 62.769 compute_mat_P_omega_contract 10 8.0 8.878 8.878 62.546 62.546 dbt_total 2336 9.6 0.014 0.014 48.699 48.699 dbt_contract 787 11.0 0.043 0.043 41.616 41.616 dbt_tas_total 1149 12.2 0.234 0.234 40.522 40.522 dbt_tas_multiply 807 12.1 0.002 0.002 39.165 39.165 dbt_tas_dbm 807 14.1 0.004 0.004 32.691 32.691 dbm_multiply 807 16.1 32.682 32.682 32.682 32.682 dbt_tas_mm_1N 524 15.1 0.001 0.001 25.428 25.428 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 23.956 23.956 compute_mat_P_omega_calc_M_occ 250 9.0 8.894 8.894 17.135 17.135 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 6.826 6.826 dbt_tas_mm_2 251 15.0 0.001 0.001 5.824 5.824 dbt_copy 1103 10.7 0.060 0.060 5.707 5.707 compute_QP_energies 1 7.0 0.000 0.000 4.979 4.979 compute_self_energy_cubic_gw 1 8.0 0.053 0.053 4.978 4.978 contract_cubic_gw 21 9.0 0.000 0.000 3.982 3.982 dbt_tas_reserve_blocks_index 3261 14.3 0.489 0.489 3.758 3.758 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 3.673 3.673 dbm_reserve_blocks 3628 15.3 3.336 3.336 3.336 3.336 dbt_reserve_blocks_index 2280 13.1 0.057 0.057 2.878 2.878 dbt_reserve_blocks_index_array 2222 12.2 0.011 0.011 2.835 2.835 scf_env_do_scf 1 3.0 0.000 0.000 2.671 2.671 scf_env_do_scf_inner_loop 17 4.0 0.002 0.002 2.671 2.671 dbt_crop 1042 12.0 1.422 1.422 2.326 2.326 convert_to_new_pgrid 2421 14.1 0.058 0.058 2.273 2.273 dbt_tas_copy 574 11.4 1.322 1.322 2.228 2.228 dbm_copy 1614 15.1 2.215 2.215 2.215 2.215 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 2.057 2.057 dbt_tas_reshape 367 15.0 0.007 0.007 2.048 2.048 compute_W_cubic_GW 10 7.0 0.011 0.011 2.037 2.037 dbt_reshape 278 11.9 1.079 1.079 1.972 1.972 get_2c_integrals 1 6.0 0.000 0.000 1.761 1.761 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.627 1.627 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.027 36.518 36.532 qs_energies 1 2.0 0.000 0.000 36.387 36.390 mp2_main 1 3.0 0.000 0.000 35.356 35.358 mp2_gpw_main 1 4.0 0.000 0.000 35.319 35.322 rpa_ri_compute_en 1 5.0 0.000 0.000 34.002 34.005 rpa_num_int 1 6.0 0.000 0.002 34.001 34.004 dbt_total 2336 9.6 0.016 0.017 29.570 29.592 compute_mat_P_omega 1 7.0 0.001 0.005 28.416 28.418 compute_mat_P_omega_contract 10 8.0 0.420 0.432 28.230 28.242 dbt_contract 787 11.0 0.041 0.041 21.822 21.829 dbt_tas_total 1149 12.2 0.084 0.092 19.399 19.425 dbt_tas_multiply 807 12.1 0.002 0.002 19.220 19.226 dbt_tas_dbm 807 14.1 0.003 0.003 13.214 13.223 dbm_multiply 807 16.1 10.594 11.345 10.594 11.345 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 9.057 9.058 compute_mat_P_omega_calc_M_occ 250 9.0 0.405 0.420 7.694 7.696 dbt_copy 1149 10.8 0.014 0.014 6.501 6.729 mp_sync 8688 11.6 5.517 6.608 5.517 6.608 dbt_tas_mm_2 251 15.0 0.001 0.002 6.460 6.463 dbt_reshape 1136 11.8 2.715 2.935 6.068 6.284 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 5.290 5.290 dbt_tas_mm_1N 524 15.1 0.001 0.002 4.649 4.989 compute_QP_energies 1 7.0 0.000 0.000 3.584 3.585 compute_self_energy_cubic_gw 1 8.0 0.003 0.003 3.582 3.583 mp_waitall_2 3812 15.3 2.490 2.695 2.490 2.695 dbt_communicate_buffer 1136 12.8 0.053 0.057 2.482 2.613 contract_cubic_gw 21 9.0 0.000 0.000 2.552 2.552 dbt_reserve_blocks_index 2887 13.1 0.070 0.076 1.828 2.023 dbt_reserve_blocks_index_array 2829 12.2 0.009 0.010 1.818 2.013 dbt_tas_reserve_blocks_index 3347 14.5 0.438 0.479 1.809 2.004 dbt_crop 1042 12.0 0.925 1.006 1.502 1.659 dbm_reserve_blocks 3752 15.4 1.467 1.627 1.467 1.627 mp_sum_l 6165 12.9 1.268 1.333 1.268 1.333 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 1.315 1.315 dbt_tas_replicate 405 14.1 0.548 0.718 1.142 1.203 compute_mat_P_omega_copy_M_vir 250 9.0 0.001 0.001 1.125 1.133 parallel_gemm_fm 105 8.4 0.000 0.000 1.056 1.066 parallel_gemm_fm_cosma 105 9.4 1.056 1.066 1.056 1.066 compute_mat_P_omega_copy_M_occ 250 9.0 0.001 0.001 1.058 1.060 scf_env_do_scf 1 3.0 0.000 0.000 0.985 0.985 scf_env_do_scf_inner_loop 17 4.0 0.001 0.002 0.984 0.985 convert_to_new_pgrid 2421 14.1 0.028 0.032 0.842 0.962 mp_max_i 2002 9.8 0.761 0.957 0.761 0.957 dbm_copy 1608 15.1 0.808 0.930 0.808 0.930 compute_W_cubic_GW 10 7.0 0.001 0.001 0.793 0.798 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 0.757 0.757 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=22.56000000000001, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=32.682, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=8.894, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=8.878, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=3.336, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbm_copy", label="dbm_copy", y=2.215, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=1.079, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=12.102000000000004, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=10.594, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_calc_M_occ", label="compute_mat_P_omega_calc_M_occ", y=0.405, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="compute_mat_P_omega_contract", label="compute_mat_P_omega_contract", y=0.42, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=1.467, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbm_copy", label="dbm_copy", y=0.808, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=2.715, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=5.517, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.49, yerr=0.0 Running RI-HFX_H2O-32.inp with 1 threads and 32 ranks... done. Running RI-HFX_H2O-32.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-HFX_H2O-32_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.019 259.633 259.633 qs_forces 1 2.0 0.000 0.000 259.026 259.026 rebuild_ks_matrix 7 6.6 0.000 0.000 257.552 257.552 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.001 257.552 257.552 hfx_ks_matrix 7 8.6 0.000 0.000 255.661 255.661 hfx_ri_update_ks 7 9.6 0.000 0.000 216.453 216.453 hfx_ri_update_ks_Pmat 7 10.6 31.379 31.379 216.449 216.449 dbt_total 809 11.0 0.006 0.006 201.519 201.519 qs_energies 1 3.0 0.000 0.000 193.454 193.454 scf_env_do_scf 1 4.0 0.000 0.000 193.102 193.102 qs_ks_update_qs_env 8 6.0 0.000 0.000 192.032 192.032 dbt_contract 207 12.4 0.036 0.036 182.889 182.889 dbt_tas_total 343 13.7 1.435 1.435 181.131 181.131 dbt_tas_multiply 216 13.5 0.001 0.001 177.660 177.660 dbt_tas_dbm 216 15.5 0.001 0.001 166.082 166.082 dbm_multiply 216 17.5 166.079 166.079 166.079 166.079 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 160.723 160.723 dbt_tas_mm_2 91 16.5 0.001 0.001 152.001 152.001 scf_env_do_scf_inner_loop 6 5.0 0.001 0.001 119.744 119.744 init_scf_loop 2 5.0 0.000 0.000 73.356 73.356 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 65.524 65.524 hfx_ri_update_forces 1 7.0 1.587 1.587 39.204 39.204 hfx_ri_forces_Pmat_3c 1 8.0 4.756 4.756 22.246 22.246 dbt_copy 409 11.7 0.034 0.034 15.152 15.152 precalc_derivatives 1 8.0 2.157 2.157 13.120 13.120 dbt_reshape 132 13.2 6.410 6.410 10.439 10.439 dbt_tas_mm_3T 77 17.1 0.000 0.000 9.844 9.844 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 9.183 9.183 dbt_tas_reserve_blocks_index 1255 15.4 1.048 1.048 7.840 7.840 build_3c_derivatives 3 9.0 2.422 2.422 7.331 7.331 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 7.198 7.198 dbm_reserve_blocks 1397 16.3 7.008 7.008 7.008 7.008 dbt_reserve_blocks_index 818 14.4 0.098 0.098 6.174 6.174 dbt_reserve_blocks_index_array 795 13.4 0.008 0.008 6.077 6.077 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-HFX_H2O-32_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.025 52.459 52.468 qs_forces 1 2.0 0.000 0.000 52.192 52.193 rebuild_ks_matrix 7 6.6 0.000 0.000 51.150 51.151 qs_ks_build_kohn_sham_matrix 7 7.6 0.001 0.002 51.150 51.151 hfx_ks_matrix 7 8.6 0.000 0.000 50.021 50.028 dbt_total 809 11.0 0.006 0.006 44.818 44.839 dbt_contract 207 12.4 0.026 0.027 34.428 34.440 hfx_ri_update_ks 7 9.6 0.000 0.000 30.361 30.361 hfx_ri_update_ks_Pmat 7 10.6 1.234 1.283 30.357 30.358 dbt_tas_total 343 13.7 0.094 0.243 30.330 30.342 dbt_tas_multiply 216 13.5 0.001 0.001 29.684 29.687 qs_energies 1 3.0 0.000 0.000 28.712 28.713 scf_env_do_scf 1 4.0 0.000 0.001 28.562 28.563 qs_ks_update_qs_env 8 6.0 0.000 0.000 27.680 27.681 qs_ks_update_qs_env_forces 1 3.0 0.000 0.000 23.470 23.471 dbt_tas_dbm 216 15.5 0.001 0.001 22.723 22.737 dbm_multiply 216 17.5 20.790 21.279 20.790 21.279 hfx_ri_update_forces 1 7.0 0.059 0.063 19.660 19.666 scf_env_do_scf_inner_loop 6 5.0 0.000 0.001 15.979 15.980 hfx_ri_forces_Pmat_3c 1 8.0 0.162 0.174 14.737 14.737 hfx_ri_update_ks_Pmat_KS 63 11.6 0.001 0.001 14.357 14.369 init_scf_loop 2 5.0 0.000 0.000 12.582 12.582 dbt_tas_mm_2 91 16.5 0.001 0.001 10.613 10.626 dbt_copy 497 12.3 0.012 0.014 8.866 9.030 dbt_reshape 365 13.6 3.656 3.725 6.847 6.962 dbt_tas_mm_3T 77 17.1 0.000 0.000 6.179 6.523 mp_sync 2769 12.9 5.342 6.194 5.342 6.194 hfx_ri_update_ks_Pmat_Px3C 63 11.6 0.000 0.000 5.621 5.623 dbt_tas_mm_3N 37 15.4 0.000 0.000 4.377 4.551 precalc_derivatives 1 8.0 0.079 0.084 3.704 3.704 hfx_ri_pre_scf_Pmat 1 12.0 0.000 0.000 3.430 3.430 dbt_tas_reserve_blocks_index 1380 15.8 0.905 0.942 3.216 3.419 mp_waitall_2 1234 16.4 2.955 3.075 2.955 3.075 dbt_reserve_blocks_index 1051 14.7 0.116 0.127 2.688 2.841 dbt_reserve_blocks_index_array 1028 13.8 0.006 0.006 2.661 2.819 dbm_reserve_blocks 1529 16.7 2.532 2.726 2.532 2.726 dbt_crop 372 13.7 1.764 1.797 2.582 2.651 dbt_communicate_buffer 365 14.6 0.013 0.013 2.107 2.196 build_3c_derivatives 3 9.0 0.222 0.241 2.016 2.024 hfx_ri_pre_scf_Pmat_RIx3C 9 13.0 0.000 0.000 1.829 1.833 dbt_tas_replicate 149 15.4 0.683 0.715 1.729 1.783 hfx_ri_update_ks_Pmat_copy_2 63 11.6 0.000 0.000 1.771 1.771 mp_sum_l 6385 13.7 1.447 1.590 1.447 1.590 convert_to_new_pgrid 648 15.5 0.032 0.064 1.389 1.558 dbm_copy 452 16.3 1.235 1.413 1.235 1.413 dbt_tas_copy 132 12.5 0.641 0.675 1.230 1.308 ------------------------------------------------------------------------------- Plot: name="RI-HFX_H2O-32_timings_32omp", title="Timings of RI-HFX_H2O-32 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="rest", label="rest", y=44.000999999999976, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_multiply", label="dbm_multiply", y=166.079, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=31.379, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=7.008, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="dbt_reshape", label="dbt_reshape", y=6.41, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=4.756, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="RI-HFX_H2O-32_timings_32mpi", title="Timings of RI-HFX_H2O-32 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="rest", label="rest", y=15.788000000000004, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_multiply", label="dbm_multiply", y=20.79, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_update_ks_Pmat", label="hfx_ri_update_ks_Pmat", y=1.234, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbm_reserve_blocks", label="dbm_reserve_blocks", y=2.532, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="dbt_reshape", label="dbt_reshape", y=3.656, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="hfx_ri_forces_Pmat_3c", label="hfx_ri_forces_Pmat_3c", y=0.162, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_sync", label="mp_sync", y=5.342, yerr=0.0 PlotPoint: plot="RI-HFX_H2O-32_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=2.955, yerr=0.0 Running RI-MP2_ammonia.inp with 1 threads and 32 ranks... done. Running RI-MP2_ammonia.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/RI-MP2_ammonia_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.016 190.842 190.842 qs_energies 1 2.0 0.000 0.000 190.645 190.645 mp2_main 1 3.0 0.000 0.000 185.908 185.908 mp2_gpw_main 1 4.0 0.001 0.001 185.512 185.512 mp2_ri_gpw_compute_in 1 5.0 0.395 0.395 139.442 139.442 mp2_ri_gpw_compute_in_loop 1 6.0 0.009 0.009 130.702 130.702 mp2_eri_3c_integrate_gpw 2656 7.0 0.014 0.014 98.355 98.355 integrate_v_rspace 2666 8.0 0.665 0.665 85.207 85.207 grid_integrate_task_list 2666 9.0 82.581 82.581 82.581 82.581 mp2_ri_gpw_compute_en 1 5.0 0.093 0.093 46.044 46.044 mp2_ri_gpw_compute_en_RI_loop 1 6.0 10.227 10.227 44.136 44.136 mp2_ri_gpw_compute_en_expansio 2080 7.0 2.050 2.050 26.131 26.131 local_gemm 2080 8.0 24.081 24.081 24.081 24.081 dbcsr_multiply_generic 5322 8.0 0.192 0.192 21.747 21.747 ao_to_mo_and_store_B_mult_1 2656 7.0 0.009 0.009 21.726 21.726 pw_transfer 63872 10.6 1.044 1.044 11.817 11.817 calculate_wavefunction 2656 8.0 8.033 8.033 11.803 11.803 multiply_cannon 5322 9.0 0.495 0.495 11.225 11.225 fft_wrap_pw1pw2 53228 11.4 0.114 0.114 10.560 10.560 ao_to_mo_and_store_B_E_Ex_1 2656 7.0 2.405 2.405 10.503 10.503 multiply_cannon_loop 5322 10.0 0.126 0.126 9.715 9.715 copy_dbcsr_to_fm 2679 8.0 0.026 0.026 8.646 8.646 get_2c_integrals 1 6.0 0.000 0.000 8.344 8.344 make_m2s 10644 9.0 0.063 0.063 8.307 8.307 multiply_cannon_multrec 5322 11.0 8.106 8.106 8.147 8.147 make_images 10644 10.0 3.320 3.320 7.974 7.974 fft_wrap_pw1pw2_20 21271 12.4 0.490 0.490 7.590 7.590 compute_2c_integrals 1 7.0 0.006 0.006 7.540 7.540 compute_2c_integrals_loop_lm 1 8.0 0.011 0.011 7.526 7.526 mp2_eri_2c_integrate_gpw 1 9.0 0.798 0.798 7.515 7.515 dbcsr_complete_redistribute 2689 9.0 1.204 1.204 6.725 6.725 fft3d_s 53229 13.4 6.471 6.471 6.504 6.504 dbcsr_finalize 10708 9.5 0.169 0.169 6.042 6.042 mp2_ri_gpw_compute_en_ener 2080 7.0 5.746 5.746 5.746 5.746 dbcsr_merge_all 8011 10.3 3.755 3.755 5.407 5.407 scf_env_do_scf 1 3.0 0.000 0.000 4.328 4.328 scf_env_do_scf_inner_loop 10 4.0 0.001 0.001 4.328 4.328 potential_pw2rs 5322 10.0 0.149 0.149 3.863 3.863 ------------------------------------------------------------------------------- From /workspace/artifacts/RI-MP2_ammonia_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.027 39.660 39.671 qs_energies 1 2.0 0.000 0.001 39.524 39.524 mp2_main 1 3.0 0.000 0.001 37.536 37.537 mp2_gpw_main 1 4.0 0.001 0.001 37.441 37.441 mp2_ri_gpw_compute_en 1 5.0 0.280 0.291 19.918 20.154 mp2_ri_gpw_compute_en_RI_loop 1 6.0 3.835 4.025 18.679 18.708 mp2_ri_gpw_compute_in 1 5.0 0.050 0.052 17.431 17.733 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.001 16.151 16.437 mp2_eri_3c_integrate_gpw 83 7.0 0.001 0.001 13.996 14.194 integrate_v_rspace 93 8.1 0.104 0.121 13.866 14.069 grid_integrate_task_list 93 9.1 13.553 13.764 13.553 13.764 mp2_ri_gpw_compute_en_expansio 65 7.0 0.151 0.162 11.280 11.420 local_gemm 65 8.0 11.128 11.259 11.128 11.259 mp2_ri_gpw_compute_en_comm 30 7.0 0.137 0.160 3.145 3.536 mp_sendrecv_dm3 1860 8.0 2.224 2.656 2.224 2.656 dbcsr_multiply_generic 176 8.0 0.008 0.009 1.827 2.102 ao_to_mo_and_store_B_mult_1 83 7.0 0.001 0.001 1.811 2.088 scf_env_do_scf 1 3.0 0.000 0.000 1.853 1.853 scf_env_do_scf_inner_loop 10 4.0 0.000 0.002 1.852 1.853 get_2c_integrals 1 6.0 0.005 0.016 1.209 1.237 multiply_cannon 176 9.0 0.016 0.018 1.079 1.149 multiply_cannon_loop 176 10.0 0.002 0.002 1.022 1.090 fill_local_i_aL 1920 8.0 0.835 0.930 0.835 0.930 multiply_cannon_multrec 246 11.0 0.879 0.916 0.884 0.922 make_m2s 352 9.0 0.003 0.003 0.711 0.912 make_images 352 10.0 0.052 0.054 0.699 0.899 compute_2c_integrals 1 7.0 0.003 0.005 0.873 0.891 qs_scf_new_mos 10 5.0 0.000 0.000 0.852 0.859 compute_2c_integrals_loop_lm 1 8.0 0.002 0.004 0.782 0.804 mp2_eri_2c_integrate_gpw 1 9.0 0.200 0.210 0.780 0.803 ------------------------------------------------------------------------------- Plot: name="RI-MP2_ammonia_timings_32omp", title="Timings of RI-MP2_ammonia with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="rest", label="rest", y=57.81400000000002, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=82.581, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="local_gemm", label="local_gemm", y=24.081, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=10.227, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.106, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="calculate_wavefunction", label="calculate_wavefunction", y=8.033, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32omp", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=0.0, yerr=0.0 Plot: name="RI-MP2_ammonia_timings_32mpi", title="Timings of RI-MP2_ammonia with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="rest", label="rest", y=8.040999999999993, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.553, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="local_gemm", label="local_gemm", y=11.128, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp2_ri_gpw_compute_en_RI_loop", label="mp2_ri_gpw_compute_en_RI_loop", y=3.835, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.879, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="calculate_wavefunction", label="calculate_wavefunction", y=0.0, yerr=0.0 PlotPoint: plot="RI-MP2_ammonia_timings_32mpi", name="mp_sendrecv_dm3", label="mp_sendrecv_dm3", y=2.224, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.094 0.094 137.147 137.147 qs_energies 1 2.0 0.000 0.000 135.680 135.680 scf_env_do_scf 1 3.0 0.000 0.000 128.704 128.704 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 128.704 128.704 qs_ks_update_qs_env 15 5.0 0.000 0.000 51.434 51.434 rebuild_ks_matrix 15 6.0 0.000 0.000 50.806 50.806 qs_ks_build_kohn_sham_matrix 15 7.0 0.002 0.002 50.806 50.806 qs_scf_new_mos 15 5.0 0.000 0.000 44.349 44.349 eigensolver 15 6.0 0.002 0.002 34.523 34.523 qs_vxc_create 15 8.0 0.047 0.047 34.376 34.376 calculate_dispersion_nonloc 15 9.0 7.079 7.079 29.569 29.569 cp_fm_diag_elpa 15 7.0 0.000 0.000 23.518 23.518 cp_fm_diag_elpa_base 15 8.0 20.600 20.600 23.518 23.518 pw_transfer 1191 10.0 0.065 0.065 22.828 22.828 fft_wrap_pw1pw2 1086 11.0 0.009 0.009 22.630 22.630 qs_rho_update_rho_low 16 5.0 0.000 0.000 20.198 20.198 calculate_rho_elec 16 6.0 0.249 0.249 20.198 20.198 grid_collocate_task_list 16 7.0 18.728 18.728 18.728 18.728 fft_wrap_pw1pw2_150 765 12.0 4.117 4.117 16.136 16.136 sum_up_and_integrate 15 8.0 0.323 0.323 14.796 14.796 integrate_v_rspace 15 9.0 0.040 0.040 14.473 14.473 grid_integrate_task_list 15 10.0 13.866 13.866 13.866 13.866 gspace_mixing 14 5.0 0.193 0.193 13.547 13.547 broyden_mixing 14 6.0 12.859 12.859 12.860 12.860 fft3d_s 1087 13.0 8.719 8.719 8.726 8.726 cp_fm_cholesky_restore 45 7.0 8.154 8.154 8.154 8.154 pw_scatter_s 585 13.1 7.614 7.614 7.614 7.614 copy_dbcsr_to_fm 16 5.9 0.001 0.001 6.954 6.954 dbcsr_complete_redistribute 46 8.3 2.904 2.904 6.708 6.708 fft_wrap_pw1pw2_200 197 12.3 0.879 0.879 6.313 6.313 cp_fm_upper_to_full 30 8.0 5.767 5.767 5.767 5.767 vdW_energy 15 10.0 4.862 4.862 4.862 4.862 xc_vxc_pw_create 15 9.0 0.239 0.239 4.760 4.760 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.535 3.535 calculate_dm_sparse 15 6.0 0.022 0.022 3.230 3.230 cp_dbcsr_plus_fm_fm_t_native 15 7.0 0.000 0.000 3.112 3.112 init_scf_run 1 3.0 0.000 0.000 2.978 2.978 xc_pw_derive 90 11.0 0.001 0.001 2.952 2.952 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.028 61.514 61.525 qs_energies 1 2.0 0.000 0.000 61.156 61.161 scf_env_do_scf 1 3.0 0.000 0.001 56.981 56.981 scf_env_do_scf_inner_loop 15 4.0 0.001 0.004 56.981 56.981 qs_ks_update_qs_env 15 5.0 0.000 0.000 25.460 25.471 rebuild_ks_matrix 15 6.0 0.000 0.000 25.406 25.418 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 25.406 25.418 qs_rho_update_rho_low 16 5.0 0.000 0.000 18.859 18.863 calculate_rho_elec 16 6.0 0.007 0.007 18.859 18.863 grid_collocate_task_list 16 7.0 17.478 17.723 17.478 17.723 sum_up_and_integrate 15 8.0 0.017 0.022 14.148 14.187 integrate_v_rspace 15 9.0 0.001 0.001 14.131 14.175 grid_integrate_task_list 15 10.0 13.323 13.430 13.323 13.430 qs_scf_new_mos 15 5.0 0.000 0.000 13.118 13.187 eigensolver 15 6.0 0.001 0.002 12.054 12.090 qs_vxc_create 15 8.0 0.001 0.001 10.895 10.903 pw_transfer 1191 10.0 0.101 0.115 8.821 8.864 calculate_dispersion_nonloc 15 9.0 0.911 0.933 8.802 8.820 fft_wrap_pw1pw2 1086 11.0 0.014 0.016 8.634 8.679 cp_fm_diag_elpa 15 7.0 0.000 0.000 8.475 8.479 cp_fm_diag_elpa_base 15 8.0 8.337 8.350 8.472 8.475 fft3d_ps 1086 13.0 3.140 3.333 5.699 5.997 fft_wrap_pw1pw2_150 765 12.0 0.816 0.857 5.729 5.750 cp_fm_cholesky_restore 45 7.0 3.424 3.477 3.424 3.477 fft_wrap_pw1pw2_200 197 12.3 0.482 0.537 2.785 2.820 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 2.654 2.654 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.362 2.553 mp_alltoall_z22v 1086 15.0 1.897 2.441 1.897 2.441 xc_vxc_pw_create 15 9.0 0.016 0.020 2.093 2.128 build_core_ppnl 1 5.0 1.467 1.587 1.467 1.587 yz_to_x 501 13.9 0.250 0.282 1.159 1.540 x_to_yz 585 14.1 0.381 0.402 1.369 1.522 xc_pw_derive 90 11.0 0.002 0.002 1.439 1.520 rs_pw_transfer 158 9.4 0.002 0.002 1.084 1.383 vdW_energy 15 10.0 1.321 1.369 1.321 1.369 density_rs2pw 16 7.0 0.001 0.001 1.145 1.297 init_scf_run 1 3.0 0.000 0.001 1.289 1.290 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=54.221000000000004, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=20.6, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=18.728, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.866, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="broyden_mixing", label="broyden_mixing", y=12.859, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.719, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=8.154, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=15.811999999999998, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=8.337, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.478, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=13.323, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="broyden_mixing", label="broyden_mixing", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=3.424, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=3.14, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.078 0.078 295.327 295.327 qs_energies 1 2.0 0.000 0.000 295.187 295.187 ls_scf 1 3.0 0.000 0.000 293.913 293.913 ls_scf_main 1 4.0 0.002 0.002 284.781 284.781 density_matrix_trs4 11 5.0 0.013 0.013 184.631 184.631 ls_scf_dm_to_ks 11 5.0 0.000 0.000 93.784 93.784 matrix_ls_to_qs 11 6.0 0.000 0.000 90.101 90.101 arnoldi_extremal 12 6.1 0.000 0.000 88.402 88.402 arnoldi_normal_ev 12 7.1 0.043 0.043 88.402 88.402 build_subspace 23 8.1 0.084 0.084 86.850 86.850 dbcsr_matrix_vector_mult 652 9.0 0.254 0.254 86.385 86.385 dbcsr_matrix_vector_mult_local 652 10.0 84.745 84.745 84.754 84.754 dbcsr_multiply_generic 185 6.1 0.891 0.891 80.115 80.115 dbcsr_complete_redistribute 23 7.5 34.641 34.641 48.266 48.266 dbcsr_copy_into_existing 11 7.0 46.247 46.247 46.247 46.247 multiply_cannon 185 7.1 0.381 0.381 46.014 46.014 matrix_decluster 11 7.0 0.000 0.000 43.829 43.829 multiply_cannon_loop 185 8.1 0.236 0.236 33.117 33.117 make_m2s 370 7.1 0.040 0.040 29.097 29.097 make_images 370 8.1 12.886 12.886 26.592 26.592 multiply_cannon_multrec 185 9.1 23.921 23.921 23.966 23.966 dbcsr_finalize 646 7.5 0.228 0.228 17.711 17.711 dbcsr_merge_all 597 8.5 2.529 2.529 16.422 16.422 setup_rec_index_2d 370 8.1 12.405 12.405 12.405 12.405 tree_to_linear_d 110 9.4 12.330 12.330 12.330 12.330 dbcsr_sort_indices 1103 9.9 11.717 11.717 11.717 11.717 quick_finalize 395 10.0 0.406 0.406 10.093 10.093 dbcsr_special_finalize 370 9.1 0.002 0.002 9.314 9.314 calculate_norms 370 9.1 8.916 8.916 8.916 8.916 ls_scf_init_scf 1 4.0 0.000 0.000 8.438 8.438 ls_scf_init_matrix_S 1 5.0 0.000 0.000 8.106 8.106 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 7.449 7.449 dbcsr_dot_sd 144 6.3 7.266 7.266 7.267 7.267 matrix_qs_to_ls 12 5.1 0.000 0.000 6.498 6.498 matrix_cluster 12 6.1 0.000 0.000 6.498 6.498 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.008 0.021 71.325 71.336 qs_energies 1 2.0 0.000 0.000 71.192 71.193 ls_scf 1 3.0 0.000 0.000 71.139 71.140 ls_scf_main 1 4.0 0.001 0.008 68.390 68.391 density_matrix_trs4 11 5.0 0.007 0.019 65.819 65.874 dbcsr_multiply_generic 185 6.1 0.060 0.072 62.649 62.928 multiply_cannon 185 7.1 0.036 0.038 51.775 52.155 multiply_cannon_loop 185 8.1 0.124 0.131 49.244 49.944 multiply_cannon_multrec 1480 9.1 29.737 31.627 30.007 31.896 mp_waitall_1 11936 10.3 16.917 18.963 16.917 18.963 multiply_cannon_metrocomm3 1480 9.1 0.017 0.019 12.819 15.137 make_m2s 370 7.1 0.036 0.038 7.149 7.192 make_images 370 8.1 0.636 0.668 7.017 7.062 calculate_norms 2960 9.1 5.139 5.528 5.139 5.528 make_images_data 370 9.1 0.011 0.013 3.293 3.443 mp_sum_l 1199 5.3 2.799 3.423 2.799 3.423 hybrid_alltoall_any 393 9.9 0.254 1.410 2.902 3.092 dbcsr_multiply_generic_mpsum_f 137 7.1 0.000 0.000 1.961 2.543 arnoldi_extremal 12 6.1 0.000 0.001 2.411 2.422 arnoldi_normal_ev 12 7.1 0.001 0.008 2.411 2.421 build_subspace 23 8.1 0.021 0.026 2.319 2.320 ls_scf_dm_to_ks 11 5.0 0.000 0.000 2.216 2.314 dbcsr_complete_redistribute 23 7.5 1.203 1.309 2.002 2.128 ls_scf_init_scf 1 4.0 0.000 0.000 2.116 2.116 ls_scf_init_matrix_S 1 5.0 0.000 0.000 2.084 2.090 matrix_ls_to_qs 11 6.0 0.000 0.000 1.941 2.074 dbcsr_matrix_vector_mult 652 9.0 0.014 0.049 1.987 2.038 matrix_decluster 11 7.0 0.000 0.000 1.799 1.928 make_images_pack 370 9.1 1.709 1.910 1.713 1.914 matrix_sqrt_Newton_Schulz 1 6.0 0.000 0.001 1.895 1.896 multiply_cannon_metrocomm1 1480 9.1 0.007 0.008 1.115 1.856 dbcsr_matrix_vector_mult_local 652 10.0 1.632 1.709 1.634 1.710 buffer_matrices_ensure_size 370 8.1 1.450 1.568 1.450 1.568 dbcsr_finalize 646 7.5 0.008 0.008 1.332 1.432 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=83.971, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=84.745, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=46.247, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=34.641, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=23.921, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images", label="make_images", y=12.886, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=8.916, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=11.552999999999997, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=1.632, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.203, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=29.737, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images", label="make_images", y=0.636, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.139, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=1.709, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=2.799, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=16.917, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.007 72.424 72.424 lib_test 1 2.0 0.000 0.000 72.415 72.415 dbcsr_run_tests 3 3.0 0.002 0.002 72.415 72.415 test_multiplies_multiproc 3 4.0 0.001 0.001 56.212 56.212 dbcsr_redistribute 9 5.0 36.429 36.429 38.063 38.063 dbcsr_multiply_generic 9 5.0 0.001 0.001 16.801 16.801 dbcsr_make_random_matrix 9 4.0 12.827 12.827 16.090 16.090 multiply_cannon 9 6.0 0.002 0.002 12.020 12.020 multiply_cannon_loop 9 7.0 0.009 0.009 11.664 11.664 multiply_cannon_multrec 9 8.0 11.654 11.654 11.655 11.655 dbcsr_finalize 27 5.7 0.015 0.015 5.832 5.832 dbcsr_merge_all 18 6.5 1.996 1.996 5.070 5.070 dbcsr_data_release 975 7.6 2.859 2.859 2.859 2.859 tree_to_linear_d 9 7.0 1.946 1.946 1.946 1.946 make_m2s 18 6.0 0.001 0.001 1.644 1.644 make_images 18 7.0 0.562 0.562 1.582 1.582 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.004 0.011 19.572 19.576 lib_test 1 2.0 0.000 0.000 19.532 19.554 dbcsr_run_tests 3 3.0 0.000 0.001 19.531 19.553 test_multiplies_multiproc 3 4.0 0.000 0.002 18.682 18.737 dbcsr_multiply_generic 9 5.0 0.001 0.001 17.128 17.230 multiply_cannon 9 6.0 0.002 0.002 15.073 15.600 multiply_cannon_loop 9 7.0 0.002 0.002 14.774 15.333 multiply_cannon_multrec 72 8.0 11.391 11.800 11.392 11.801 mp_waitall_1 576 9.2 3.712 4.558 3.712 4.558 multiply_cannon_metrocomm1 72 8.0 0.001 0.001 3.314 4.150 mp_sum_l 470 2.5 0.729 1.264 0.729 1.264 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.718 1.253 dbcsr_data_release 444 7.6 0.772 0.901 0.772 0.901 dbcsr_make_random_matrix 9 4.0 0.656 0.664 0.811 0.836 dbcsr_destroy 111 5.9 0.001 0.033 0.659 0.782 make_m2s 18 6.0 0.001 0.001 0.722 0.757 make_images 18 7.0 0.021 0.022 0.718 0.753 dbcsr_finalize 27 5.7 0.000 0.000 0.622 0.705 dbcsr_redistribute 9 5.0 0.252 0.284 0.566 0.590 dbcsr_merge_all 18 6.5 0.095 0.118 0.507 0.570 make_images_data 18 8.0 0.001 0.001 0.375 0.431 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=6.659000000000006, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=36.429, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=12.827, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.654, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.859, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=1.996, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=1.9649999999999999, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.252, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.656, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=11.391, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.772, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.095, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=3.712, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.729, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.053 0.053 137.294 137.294 qs_mol_dyn_low 1 2.0 0.003 0.003 135.850 135.850 velocity_verlet 5 3.0 0.003 0.003 110.912 110.912 qmmm_el_coupling 6 3.8 0.000 0.000 91.528 91.528 qmmm_elec_with_gaussian 6 4.8 0.012 0.012 91.518 91.518 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 90.881 90.881 qmmm_elec_gaussian_low_G 6 6.8 89.983 89.983 89.983 89.983 qs_forces 6 3.8 0.001 0.001 35.943 35.943 qs_energies 6 4.8 0.000 0.000 31.963 31.963 scf_env_do_scf 6 5.8 0.001 0.001 29.554 29.554 scf_env_do_scf_inner_loop 39 6.8 0.004 0.004 25.846 25.846 rebuild_ks_matrix 45 8.4 0.000 0.000 24.829 24.829 qs_ks_build_kohn_sham_matrix 45 9.4 0.005 0.005 24.829 24.829 qs_ks_update_qs_env 45 7.8 0.000 0.000 21.277 21.277 pw_transfer 966 12.3 0.057 0.057 17.694 17.694 fft_wrap_pw1pw2 801 13.6 0.006 0.006 17.482 17.482 fft_wrap_pw1pw2_150 507 15.2 2.311 2.311 16.938 16.938 qs_vxc_create 45 10.4 0.001 0.001 13.560 13.560 xc_vxc_pw_create 45 11.4 0.663 0.663 13.559 13.559 xc_pw_derive 270 13.4 0.002 0.002 9.558 9.558 fft3d_s 802 15.6 8.075 8.075 8.083 8.083 qs_rho_update_rho_low 45 7.9 0.000 0.000 7.392 7.392 calculate_rho_elec 45 8.9 0.564 0.564 7.392 7.392 xc_rho_set_and_dset_create 45 12.4 0.625 0.625 7.050 7.050 xc_pw_divergence 45 12.4 0.001 0.001 5.794 5.794 pw_scatter_s 429 15.8 5.775 5.775 5.775 5.775 qmmm_forces 6 3.8 0.001 0.001 5.308 5.308 qmmm_forces_with_gaussian 6 4.8 0.018 0.018 4.962 4.962 pw_integral_ab 2539 7.4 4.530 4.530 4.530 4.530 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 4.236 4.236 qs_ks_ddapc 45 10.4 0.001 0.001 4.225 4.225 sum_up_and_integrate 45 10.4 0.509 0.509 3.902 3.902 init_scf_loop 6 6.8 0.000 0.000 3.703 3.703 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 3.560 3.560 qmmm_forces_gaussian_low_G 6 6.8 3.519 3.519 3.519 3.519 grid_collocate_task_list 45 9.9 3.436 3.436 3.436 3.436 integrate_v_rspace 45 11.4 0.020 0.020 3.393 3.393 density_rs2pw 45 9.9 0.002 0.002 3.392 3.392 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.038 0.054 60.286 60.296 qs_mol_dyn_low 1 2.0 0.003 0.004 59.123 59.184 qs_forces 6 3.8 0.001 0.001 44.216 44.216 qs_energies 6 4.8 0.000 0.001 42.206 42.207 scf_env_do_scf 6 5.8 0.000 0.001 41.071 41.071 scf_env_do_scf_inner_loop 113 6.2 0.002 0.016 39.376 39.378 rebuild_ks_matrix 119 8.1 0.000 0.000 29.155 29.164 qs_ks_build_kohn_sham_matrix 119 9.1 0.016 0.019 29.155 29.163 qs_ks_update_qs_env 119 7.3 0.001 0.001 27.446 27.454 velocity_verlet 5 3.0 0.002 0.004 24.357 24.360 pw_transfer 2446 12.3 0.207 0.228 21.036 21.598 fft_wrap_pw1pw2 2059 13.4 0.025 0.029 20.571 21.143 fft_wrap_pw1pw2_150 1321 14.9 3.536 3.849 19.969 20.492 qs_vxc_create 119 10.1 0.002 0.002 15.889 15.895 xc_vxc_pw_create 119 11.1 0.155 0.209 15.887 15.893 fft3d_ps 2059 15.4 7.959 8.625 13.308 14.380 xc_pw_derive 714 13.1 0.011 0.013 12.220 12.486 qs_rho_update_rho_low 119 7.3 0.001 0.001 11.315 11.316 calculate_rho_elec 119 8.3 0.050 0.057 11.315 11.316 sum_up_and_integrate 119 10.1 0.078 0.087 9.641 9.652 integrate_v_rspace 119 11.1 0.003 0.004 9.564 9.580 xc_pw_divergence 119 12.1 0.005 0.006 8.075 8.298 qmmm_forces 6 3.8 0.002 0.002 7.581 7.581 xc_rho_set_and_dset_create 119 12.1 0.338 0.407 7.425 7.572 qmmm_forces_with_gaussian 6 4.8 0.007 0.009 7.296 7.456 density_rs2pw 119 9.3 0.006 0.008 6.737 7.021 rs_pw_transfer 988 11.5 0.013 0.015 6.654 6.921 qmmm_el_coupling 6 3.8 0.000 0.000 6.471 6.565 qmmm_elec_with_gaussian 6 4.8 0.003 0.004 6.470 6.563 potential_pw2rs 119 12.1 0.006 0.007 5.797 5.809 grid_collocate_task_list 119 9.3 4.399 4.673 4.399 4.673 mp_alltoall_z22v 2059 17.4 3.892 4.478 3.892 4.478 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 3.966 4.041 grid_integrate_task_list 119 12.1 3.511 3.715 3.511 3.715 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.496 3.530 qmmm_forces_gaussian_low_G 6 6.8 3.261 3.334 3.261 3.334 mp_waitany 4028 12.8 2.625 3.180 2.625 3.180 x_to_yz 1095 16.8 0.801 0.874 2.855 3.121 qmmm_elec_gaussian_low_G 6 6.8 2.904 2.929 2.904 2.929 yz_to_x 964 16.0 0.587 0.726 2.425 2.865 pw_restrict_s3 18 5.8 1.438 1.479 2.771 2.863 qs_scf_new_mos 113 7.2 0.000 0.001 2.620 2.625 qs_scf_loop_do_ot 113 8.2 0.000 0.001 2.620 2.624 rs_pw_transfer_PW2RS_150 125 13.9 0.928 1.015 2.592 2.619 ot_scf_mini 113 9.2 0.001 0.001 2.529 2.534 pw_gather_p 964 15.0 2.077 2.374 2.077 2.374 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 2.244 2.334 pw_prolongate_s3 18 6.8 1.185 1.224 2.244 2.334 rs_pw_transfer_RS2PW_150 125 11.2 0.699 0.854 2.050 2.329 qs_ks_ddapc 119 10.1 0.003 0.003 2.243 2.307 dbcsr_multiply_generic 2588 12.3 0.059 0.063 1.895 1.917 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 1.727 1.728 init_scf_loop 6 6.8 0.000 0.000 1.691 1.691 pw_integral_ab 2761 7.7 1.344 1.412 1.585 1.687 pw_scatter_p 1095 15.8 1.578 1.607 1.578 1.607 ot_mini 113 10.2 0.000 0.000 1.236 1.241 mp_sum_dm3 33 5.7 1.178 1.216 1.178 1.216 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=19.664999999999992, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=89.983, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=8.075, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=5.775, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=4.53, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.519, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=3.436, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=2.311, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=29.48, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.904, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=1.344, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=3.261, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=4.399, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft_wrap_pw1pw2_150", label="fft_wrap_pw1pw2_150", y=3.536, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=3.511, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=7.959, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=3.892, yerr=0.0 Summary: Performance test took 34 minutes. Status: OK Removing intermediate container a567ee261e75 ---> acb8744e6cc1 Step 41/42 : CMD cat $(find ./report.log -mmin +10) | sed '/^Summary:/ s/$/ (cached)/' ---> Running in 0db2d8f6138b Removing intermediate container 0db2d8f6138b ---> 61f344b8f3c6 Step 42/42 : ENTRYPOINT [] ---> Running in 72b87505bb94 Removing intermediate container 72b87505bb94 ---> 818fa69ab30e [Warning] One or more build-args [GIT_COMMIT_SHA] were not consumed Successfully built 818fa69ab30e Successfully tagged us-central1-docker.pkg.dev/cp2k-org-project/cp2kci/img_cp2k-perf-openmp:master Pushing new image... done. #################### Running Image cp2k-perf-openmp #################### Uploading artifacts... done EndDate: 2023-06-26 13:36:51+00:00