StartDate: 2021-10-08 20:22:46+00:00 CpuId: 64x Intel Xeon W 2000 / D-2100 (Skylake / Cascade Lake) {Skylake}, 14nm CommitSHA: 15378df3ae939e32cfbc10f886fdc118e4c5a51b CommitTime: 2021-10-08 13:27:31 +0200 CommitAuthor: Frederick Stein CommitSubject: Further simplifications of the Code, Remove one call to 2nd order routines Trying to pull image cp2k-toolchain-mpich... success :-) Trying to pull image cp2k-perf-openmp... success :-) #################### Running Image cp2k-perf-openmp #################### ========== Fetching Git Commit ========== CommitSHA: 15378df3ae939e32cfbc10f886fdc118e4c5a51b CommitTime: 2021-10-08 13:27:31 +0200 CommitAuthor: Frederick Stein CommitSubject: Further simplifications of the Code, Remove one call to 2nd order routines ========== Running Test ========== ========== Compiling CP2K ========== Compiling cp2k... done. ========== Running Performance Test ========== Running H2O-64.inp with 1 threads and 32 ranks... done. Running H2O-64.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.040 0.040 171.243 171.243 qs_mol_dyn_low 1 2.0 0.004 0.004 170.391 170.391 qs_forces 11 3.9 0.002 0.002 170.330 170.330 qs_energies 11 4.9 0.001 0.001 158.078 158.078 scf_env_do_scf 11 5.9 0.001 0.001 122.481 122.481 velocity_verlet 10 3.0 0.002 0.002 116.692 116.692 scf_env_do_scf_inner_loop 108 6.5 0.010 0.010 95.028 95.028 rebuild_ks_matrix 119 8.3 0.001 0.001 43.348 43.348 qs_ks_build_kohn_sham_matrix 119 9.3 0.020 0.020 43.347 43.347 qs_rho_update_rho 119 7.7 0.001 0.001 39.206 39.206 calculate_rho_elec 119 8.7 1.579 1.579 39.205 39.205 qs_ks_update_qs_env 119 7.6 0.001 0.001 38.658 38.658 grid_collocate_task_list 119 9.7 32.807 32.807 32.807 32.807 sum_up_and_integrate 119 10.3 0.398 0.398 30.873 30.873 integrate_v_rspace 119 11.3 0.167 0.167 30.475 30.475 grid_integrate_task_list 119 12.3 27.688 27.688 27.688 27.688 init_scf_loop 11 6.9 0.000 0.000 27.246 27.246 qs_scf_new_mos 108 7.5 0.001 0.001 24.990 24.990 qs_scf_loop_do_ot 108 8.5 0.001 0.001 24.989 24.989 dbcsr_multiply_generic 2286 12.5 0.188 0.188 23.804 23.804 ot_scf_mini 108 9.5 0.004 0.004 23.479 23.479 prepare_preconditioner 11 7.9 0.000 0.000 22.428 22.428 make_preconditioner 11 8.9 0.000 0.000 22.428 22.428 make_full_inverse_cholesky 11 9.9 0.000 0.000 20.304 20.304 init_scf_run 11 5.9 0.001 0.001 18.549 18.549 scf_env_initial_rho_setup 11 6.9 0.001 0.001 18.548 18.548 wfi_extrapolate 11 7.9 0.001 0.001 17.623 17.623 cp_gemm 81 9.0 0.000 0.000 16.728 16.728 cp_gemm_cosma 81 10.0 16.727 16.727 16.727 16.727 ot_mini 108 10.5 0.001 0.001 15.177 15.177 make_m2s 4572 13.5 0.068 0.068 14.267 14.267 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 11.564 11.564 pw_transfer 1439 11.6 0.103 0.103 8.167 8.167 cp_fm_cholesky_decompose 22 10.9 7.998 7.998 7.998 7.998 fft_wrap_pw1pw2 1201 12.6 0.011 0.011 7.823 7.823 ot_diis_step 108 11.5 0.006 0.006 7.696 7.696 make_images 4572 14.5 2.765 2.765 7.667 7.667 qs_ot_get_derivative 108 11.5 0.002 0.002 7.477 7.477 dbcsr_make_dense_low 5837 15.5 0.114 0.114 6.809 6.809 make_dense_data 5837 16.5 5.950 5.950 6.672 6.672 dbcsr_complete_redistribute 329 12.2 3.165 3.165 6.653 6.653 fft_wrap_pw1pw2_140 487 13.2 0.692 0.692 6.618 6.618 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.558 6.558 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 6.543 6.543 apply_single 119 13.6 0.001 0.001 6.543 6.543 qs_env_update_s_mstruct 11 6.9 0.000 0.000 6.456 6.456 dbcsr_copy 2102 12.0 0.310 0.310 6.229 6.229 dbcsr_make_images_dense 3978 14.8 0.028 0.028 6.111 6.111 qs_create_task_list 11 7.9 0.000 0.000 5.864 5.864 generate_qs_task_list 11 8.9 4.088 4.088 5.864 5.864 dbcsr_copy_into_existing 22 7.9 5.851 5.851 5.852 5.852 cp_fm_cholesky_invert 11 10.9 5.776 5.776 5.776 5.776 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.691 5.691 copy_dbcsr_to_fm 153 11.3 0.004 0.004 5.406 5.406 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.303 5.303 calculate_w_matrix_ot 11 6.9 0.009 0.009 5.302 5.302 multiply_cannon 2286 13.5 0.325 0.325 5.005 5.005 pw_poisson_solve 119 10.3 2.086 2.086 4.872 4.872 density_rs2pw 119 9.7 0.007 0.007 4.819 4.819 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.488 4.488 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.438 4.438 qs_ot_get_p 119 10.4 0.001 0.001 4.252 4.252 multiply_cannon_loop 2286 14.5 0.068 0.068 4.185 4.185 multiply_cannon_multrec 2286 15.5 4.038 4.038 4.115 4.115 fft3d_s 1202 14.6 3.556 3.556 3.563 3.563 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.009 0.014 82.682 82.683 qs_mol_dyn_low 1 2.0 0.005 0.006 82.549 82.555 qs_forces 11 3.9 0.002 0.002 82.494 82.494 qs_energies 11 4.9 0.001 0.002 77.101 77.105 scf_env_do_scf 11 5.9 0.001 0.001 69.819 69.821 scf_env_do_scf_inner_loop 108 6.5 0.004 0.011 64.802 64.809 velocity_verlet 10 3.0 0.002 0.002 48.636 48.638 rebuild_ks_matrix 119 8.3 0.001 0.001 31.636 31.691 qs_ks_build_kohn_sham_matrix 119 9.3 0.024 0.026 31.635 31.690 qs_ks_update_qs_env 119 7.6 0.001 0.002 28.101 28.153 qs_rho_update_rho 119 7.7 0.001 0.001 24.632 24.658 calculate_rho_elec 119 8.7 0.049 0.053 24.631 24.657 sum_up_and_integrate 119 10.3 0.059 0.063 24.457 24.489 integrate_v_rspace 119 11.3 0.005 0.006 24.398 24.429 dbcsr_multiply_generic 2286 12.5 0.146 0.148 20.710 20.773 grid_collocate_task_list 119 9.7 17.049 17.678 17.049 17.678 grid_integrate_task_list 119 12.3 17.044 17.537 17.044 17.537 qs_scf_new_mos 108 7.5 0.001 0.002 17.107 17.159 qs_scf_loop_do_ot 108 8.5 0.001 0.001 17.106 17.158 ot_scf_mini 108 9.5 0.004 0.004 16.043 16.105 multiply_cannon 2286 13.5 0.243 0.247 13.838 14.045 multiply_cannon_loop 2286 14.5 0.255 0.265 12.479 12.920 mp_waitall_1 169478 16.3 10.214 10.538 10.214 10.538 ot_mini 108 10.5 0.001 0.001 9.571 9.634 rs_pw_transfer 974 11.9 0.018 0.019 7.916 8.787 density_rs2pw 119 9.7 0.009 0.010 6.873 7.753 pw_transfer 1439 11.6 0.160 0.167 7.009 7.067 multiply_cannon_metrocomm3 18288 15.5 0.089 0.093 6.529 6.981 fft_wrap_pw1pw2 1201 12.6 0.015 0.016 6.654 6.722 potential_pw2rs 119 12.3 0.011 0.012 6.095 6.117 fft_wrap_pw1pw2_140 487 13.2 0.630 0.655 5.722 5.900 fft3d_ps 1201 14.6 2.716 2.866 5.017 5.097 init_scf_run 11 5.9 0.000 0.002 5.045 5.045 scf_env_initial_rho_setup 11 6.9 0.000 0.001 5.045 5.045 init_scf_loop 11 6.9 0.001 0.001 4.987 4.988 ot_diis_step 108 11.5 0.005 0.006 4.795 4.795 qs_ot_get_derivative 108 11.5 0.002 0.002 4.722 4.787 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 4.689 4.726 apply_single 119 13.6 0.001 0.001 4.688 4.725 multiply_cannon_multrec 18288 15.5 4.519 4.660 4.539 4.679 make_m2s 4572 13.5 0.081 0.084 4.564 4.636 wfi_extrapolate 11 7.9 0.001 0.001 4.600 4.600 make_images 4572 14.5 0.197 0.201 3.803 3.871 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.781 3.791 mp_waitany 9880 13.7 2.787 3.657 2.787 3.657 rs_pw_transfer_RS2PW_140 130 11.5 0.571 0.603 2.454 3.338 rs_pw_transfer_PW2RS_140 130 13.9 1.408 1.475 2.956 2.980 qs_ot_get_p 119 10.4 0.001 0.002 2.226 2.320 mp_alltoall_d11v 2130 13.8 1.609 2.231 1.609 2.231 rs_gather_matrices 119 12.3 0.147 0.159 1.197 1.872 make_images_data 4572 15.5 0.064 0.071 1.720 1.825 mp_alltoall_z22v 1201 16.6 1.548 1.756 1.548 1.756 prepare_preconditioner 11 7.9 0.000 0.000 1.734 1.749 make_preconditioner 11 8.9 0.000 0.000 1.734 1.749 cp_gemm 81 9.0 0.000 0.000 1.706 1.710 cp_gemm_cosma 81 10.0 1.705 1.710 1.705 1.710 ------------------------------------------------------------------------------- Plot: name="H2O-64_timings_32omp", title="Timings of H2O-64 with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32omp", name="rest", label="rest", y=76.03499999999998, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=32.807, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=27.688, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=16.727, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=7.998, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="make_dense_data", label="make_dense_data", y=5.95, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.038, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 Plot: name="H2O-64_timings_32mpi", title="Timings of H2O-64 with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_timings_32mpi", name="rest", label="rest", y=29.364000000000004, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=17.049, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=17.044, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=1.705, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="make_dense_data", label="make_dense_data", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=4.519, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=10.214, yerr=0.0 PlotPoint: plot="H2O-64_timings_32mpi", name="mp_waitany", label="mp_waitany", y=2.787, yerr=0.0 Running H2O-64_nonortho.inp with 1 threads and 32 ranks... done. Running H2O-64_nonortho.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-64_nonortho_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.040 0.040 210.085 210.085 qs_mol_dyn_low 1 2.0 0.004 0.004 209.237 209.237 qs_forces 11 3.9 0.002 0.002 209.176 209.176 qs_energies 11 4.9 0.001 0.001 195.017 195.017 scf_env_do_scf 11 5.9 0.001 0.001 155.747 155.747 velocity_verlet 10 3.0 0.002 0.002 140.299 140.299 scf_env_do_scf_inner_loop 96 6.5 0.010 0.010 125.456 125.456 rebuild_ks_matrix 107 8.3 0.001 0.001 64.430 64.430 qs_ks_build_kohn_sham_matrix 107 9.3 0.019 0.019 64.429 64.429 qs_rho_update_rho 107 7.7 0.001 0.001 58.946 58.946 calculate_rho_elec 107 8.7 1.415 1.415 58.945 58.945 qs_ks_update_qs_env 107 7.6 0.001 0.001 57.746 57.746 grid_collocate_task_list 107 9.7 53.190 53.190 53.190 53.190 sum_up_and_integrate 107 10.3 0.364 0.364 53.098 53.098 integrate_v_rspace 107 11.3 0.148 0.148 52.733 52.733 grid_integrate_task_list 107 12.3 50.180 50.180 50.180 50.180 init_scf_loop 11 6.9 0.000 0.000 30.074 30.074 prepare_preconditioner 11 7.9 0.000 0.000 22.535 22.535 make_preconditioner 11 8.9 0.000 0.000 22.535 22.535 qs_scf_new_mos 96 7.5 0.001 0.001 21.697 21.697 qs_scf_loop_do_ot 96 8.5 0.001 0.001 21.696 21.696 init_scf_run 11 5.9 0.001 0.001 20.981 20.981 scf_env_initial_rho_setup 11 6.9 0.001 0.001 20.979 20.979 dbcsr_multiply_generic 1966 12.4 0.167 0.167 20.808 20.808 make_full_inverse_cholesky 11 9.9 0.000 0.000 20.451 20.451 ot_scf_mini 96 9.5 0.003 0.003 20.356 20.356 wfi_extrapolate 11 7.9 0.001 0.001 19.814 19.814 cp_gemm 81 9.0 0.000 0.000 16.721 16.721 cp_gemm_cosma 81 10.0 16.721 16.721 16.721 16.721 ot_mini 96 10.5 0.001 0.001 13.126 13.126 qs_energies_init_hamiltonians 11 5.9 0.000 0.000 12.789 12.789 make_m2s 3932 13.4 0.060 0.060 12.424 12.424 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 8.389 8.389 cp_fm_cholesky_decompose 22 10.9 8.055 8.055 8.055 8.055 qs_env_update_s_mstruct 11 6.9 0.000 0.000 7.653 7.653 pw_transfer 1295 11.6 0.094 0.094 7.439 7.439 fft_wrap_pw1pw2 1081 12.6 0.011 0.011 7.130 7.130 qs_create_task_list 11 7.9 0.000 0.000 7.083 7.083 generate_qs_task_list 11 8.9 5.296 5.296 7.083 7.083 dbcsr_complete_redistribute 317 12.2 3.121 3.121 6.770 6.770 make_images 3932 14.4 2.434 2.434 6.726 6.726 ot_diis_step 96 11.5 0.005 0.005 6.595 6.595 qs_ot_get_derivative 96 11.5 0.001 0.001 6.527 6.527 dbcsr_copy 1855 11.9 0.283 0.283 6.255 6.255 fft_wrap_pw1pw2_140 439 13.2 0.602 0.602 6.049 6.049 dbcsr_copy_into_existing 22 7.9 5.918 5.918 5.918 5.918 dbcsr_make_dense_low 4961 15.5 0.100 0.100 5.889 5.889 make_dense_data 4961 16.5 5.170 5.170 5.769 5.769 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 5.766 5.766 cp_fm_cholesky_invert 11 10.9 5.677 5.677 5.677 5.677 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 5.655 5.655 apply_single 107 13.6 0.000 0.000 5.654 5.654 copy_dbcsr_to_fm 147 11.2 0.004 0.004 5.597 5.597 qs_energies_compute_matrix_w 11 5.9 0.000 0.000 5.316 5.316 calculate_w_matrix_ot 11 6.9 0.008 0.008 5.316 5.316 dbcsr_make_images_dense 3386 14.7 0.024 0.024 5.259 5.259 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 4.639 4.639 pw_poisson_solve 107 10.3 1.956 1.956 4.487 4.487 multiply_cannon 1966 13.4 0.280 0.280 4.453 4.453 build_core_hamiltonian_matrix 11 6.9 0.001 0.001 4.452 4.452 density_rs2pw 107 9.7 0.006 0.006 4.341 4.341 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-64_nonortho_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.012 136.453 136.454 qs_mol_dyn_low 1 2.0 0.005 0.006 136.330 136.336 qs_forces 11 3.9 0.002 0.002 136.275 136.276 qs_energies 11 4.9 0.001 0.002 127.122 127.124 scf_env_do_scf 11 5.9 0.001 0.001 116.927 116.929 scf_env_do_scf_inner_loop 96 6.5 0.003 0.010 108.647 108.647 velocity_verlet 10 3.0 0.002 0.002 81.270 81.272 rebuild_ks_matrix 107 8.3 0.001 0.001 61.441 61.480 qs_ks_build_kohn_sham_matrix 107 9.3 0.021 0.022 61.441 61.479 sum_up_and_integrate 107 10.3 0.050 0.054 55.216 55.265 integrate_v_rspace 107 11.3 0.004 0.005 55.165 55.217 qs_ks_update_qs_env 107 7.6 0.001 0.001 54.092 54.125 qs_rho_update_rho 107 7.7 0.001 0.001 52.031 52.051 calculate_rho_elec 107 8.7 0.043 0.046 52.030 52.050 grid_integrate_task_list 107 12.3 47.746 49.201 47.746 49.201 grid_collocate_task_list 107 9.7 44.746 46.046 44.746 46.046 dbcsr_multiply_generic 1966 12.4 0.122 0.123 17.415 17.735 qs_scf_new_mos 96 7.5 0.001 0.001 14.021 14.064 qs_scf_loop_do_ot 96 8.5 0.001 0.001 14.020 14.063 ot_scf_mini 96 9.5 0.003 0.003 13.160 13.198 multiply_cannon 1966 13.4 0.205 0.210 11.737 11.914 multiply_cannon_loop 1966 14.4 0.211 0.216 10.642 10.898 mp_waitall_1 146670 16.2 8.645 8.997 8.645 8.997 rs_pw_transfer 878 11.9 0.016 0.018 7.840 8.854 init_scf_loop 11 6.9 0.001 0.001 8.261 8.261 init_scf_run 11 5.9 0.000 0.002 8.034 8.034 scf_env_initial_rho_setup 11 6.9 0.000 0.001 8.034 8.034 ot_mini 96 10.5 0.001 0.001 7.827 7.861 density_rs2pw 107 9.7 0.008 0.009 6.666 7.714 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 7.568 7.574 wfi_extrapolate 11 7.9 0.001 0.001 7.373 7.373 pw_transfer 1295 11.6 0.140 0.147 5.966 6.037 multiply_cannon_metrocomm3 15728 15.4 0.075 0.077 5.526 5.745 fft_wrap_pw1pw2 1081 12.6 0.013 0.015 5.660 5.723 potential_pw2rs 107 12.3 0.009 0.010 5.409 5.424 fft_wrap_pw1pw2_140 439 13.2 0.548 0.565 4.905 5.022 mp_waitany 8968 13.7 3.330 4.357 3.330 4.357 fft3d_ps 1081 14.6 2.361 2.472 4.240 4.320 multiply_cannon_multrec 15728 15.4 3.903 4.026 3.920 4.043 apply_preconditioner_dbcsr 107 12.6 0.000 0.000 3.999 4.031 apply_single 107 13.6 0.001 0.001 3.998 4.031 ot_diis_step 96 11.5 0.004 0.005 3.999 3.999 rs_pw_transfer_RS2PW_140 118 11.5 0.444 0.472 2.935 3.969 make_m2s 3932 13.4 0.068 0.071 3.866 3.925 qs_ot_get_derivative 96 11.5 0.001 0.002 3.789 3.821 mp_alltoall_d11v 1998 13.7 2.324 3.604 2.324 3.604 make_images 3932 14.4 0.169 0.175 3.222 3.280 rs_gather_matrices 107 12.3 0.131 0.143 1.950 3.187 rs_pw_transfer_PW2RS_140 118 13.9 1.343 1.405 2.794 2.827 ------------------------------------------------------------------------------- Plot: name="H2O-64_nonortho_timings_32omp", title="Timings of H2O-64_nonortho with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="rest", label="rest", y=76.02099999999999, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=53.19, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=50.18, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=16.721, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=8.055, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=5.918, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitany", label="mp_waitany", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 Plot: name="H2O-64_nonortho_timings_32mpi", title="Timings of H2O-64_nonortho with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="rest", label="rest", y=28.083, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=44.746, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=47.746, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="cp_fm_cholesky_decompose", label="cp_fm_cholesky_decompose", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=3.903, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitany", label="mp_waitany", y=3.33, yerr=0.0 PlotPoint: plot="H2O-64_nonortho_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=8.645, yerr=0.0 Running H2O-hyb.inp with 1 threads and 32 ranks... done. Running H2O-hyb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/H2O-hyb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.371 0.371 276.454 276.454 qs_energies 1 2.0 0.000 0.000 275.167 275.167 scf_env_do_scf 1 3.0 0.000 0.000 272.303 272.303 qs_ks_update_qs_env 8 5.0 0.000 0.000 263.107 263.107 rebuild_ks_matrix 7 6.0 0.000 0.000 262.996 262.996 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.002 262.996 262.996 hfx_ks_matrix 7 8.0 0.000 0.000 169.910 169.910 integrate_four_center 7 9.0 2.277 2.277 169.878 169.878 scf_env_do_scf_inner_loop 7 4.0 0.001 0.001 167.956 167.956 integrate_four_center_main 7 10.0 1.119 1.119 158.276 158.276 integrate_four_center_bin 450 11.0 157.157 157.157 157.157 157.157 init_scf_loop 1 4.0 0.000 0.000 104.333 104.333 cp_gemm 129 10.3 0.001 0.001 77.383 77.383 cp_gemm_cosma 129 11.3 77.383 77.383 77.383 77.383 admm_mo_calc_rho_aux 7 8.0 0.000 0.000 42.194 42.194 admm_mo_merge_derivs 7 8.0 0.000 0.000 39.719 39.719 merge_mo_derivs_diag 7 9.0 0.023 0.023 39.719 39.719 admm_fit_mo_coeffs 7 9.0 0.000 0.000 39.412 39.412 purify_mo_diag 7 10.0 0.001 0.001 22.878 22.878 fit_mo_coeffs 7 10.0 0.000 0.000 16.534 16.534 integrate_four_center_load 7 10.0 0.001 0.001 8.921 8.921 hfx_load_balance 1 11.0 0.002 0.002 8.920 8.920 calculate_rho_elec 15 7.4 0.195 0.195 6.367 6.367 grid_collocate_task_list 15 8.4 5.566 5.566 5.566 5.566 ------------------------------------------------------------------------------- From /workspace/artifacts/H2O-hyb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.233 0.238 190.161 190.162 qs_energies 1 2.0 0.000 0.001 189.777 189.778 scf_env_do_scf 1 3.0 0.000 0.000 189.152 189.152 qs_ks_update_qs_env 8 5.0 0.000 0.000 185.795 185.796 rebuild_ks_matrix 7 6.0 0.000 0.000 185.781 185.781 qs_ks_build_kohn_sham_matrix 7 7.0 0.002 0.003 185.781 185.781 hfx_ks_matrix 7 8.0 0.000 0.001 172.876 172.877 integrate_four_center 7 9.0 0.105 0.423 172.859 172.859 integrate_four_center_main 7 10.0 0.005 0.005 158.680 162.462 integrate_four_center_bin 448 11.0 158.675 162.457 158.675 162.457 scf_env_do_scf_inner_loop 7 4.0 0.000 0.001 111.700 111.700 init_scf_loop 1 4.0 0.000 0.000 77.450 77.450 integrate_four_center_load 7 10.0 0.000 0.000 8.941 8.944 hfx_load_balance 1 11.0 0.001 0.002 8.941 8.944 mp_sync 70 11.3 4.358 7.489 4.358 7.489 cp_gemm 129 10.3 0.001 0.001 5.077 5.083 cp_gemm_cosma 129 11.3 5.076 5.082 5.076 5.082 hfx_load_balance_bin 1 12.0 4.349 4.480 4.349 4.480 hfx_load_balance_count 1 12.0 4.332 4.448 4.332 4.448 qs_vxc_create 14 8.0 0.000 0.001 3.867 3.867 xc_vxc_pw_create 14 9.0 0.021 0.023 3.867 3.867 ------------------------------------------------------------------------------- Plot: name="H2O-hyb_timings_32omp", title="Timings of H2O-hyb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32omp", name="rest", label="rest", y=32.952, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_bin", label="integrate_four_center_bin", y=157.157, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=77.383, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.566, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center", label="integrate_four_center", y=2.277, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="integrate_four_center_main", label="integrate_four_center_main", y=1.119, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_count", label="hfx_load_balance_count", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32omp", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=0.0, yerr=0.0 Plot: name="H2O-hyb_timings_32mpi", title="Timings of H2O-hyb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="H2O-hyb_timings_32mpi", name="rest", label="rest", y=13.261000000000024, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_bin", label="integrate_four_center_bin", y=158.675, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=5.076, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center", label="integrate_four_center", y=0.105, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="integrate_four_center_main", label="integrate_four_center_main", y=0.005, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_count", label="hfx_load_balance_count", y=4.332, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="mp_sync", label="mp_sync", y=4.358, yerr=0.0 PlotPoint: plot="H2O-hyb_timings_32mpi", name="hfx_load_balance_bin", label="hfx_load_balance_bin", y=4.349, yerr=0.0 Running GW_PBE_4benzene.inp with 1 threads and 32 ranks... done. Running GW_PBE_4benzene.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/GW_PBE_4benzene_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.015 431.466 431.466 qs_energies 1 2.0 0.000 0.000 430.927 430.927 mp2_main 1 3.0 0.000 0.000 426.048 426.048 mp2_gpw_main 1 4.0 0.000 0.000 425.862 425.862 rpa_ri_compute_en 1 5.0 0.000 0.000 402.175 402.175 rpa_num_int 1 6.0 0.000 0.000 402.148 402.148 compute_mat_P_omega 1 7.0 0.002 0.002 205.134 205.134 compute_mat_P_omega_contract 10 8.0 14.026 14.026 203.517 203.517 dbcsr_t_total 2336 9.6 0.018 0.018 191.602 191.602 cp_gemm 105 8.4 0.000 0.000 175.033 175.033 cp_gemm_cosma 105 9.4 175.033 175.033 175.033 175.033 GW_matrix_operations 10 7.0 0.008 0.008 115.151 115.151 dbcsr_t_contract 787 11.0 51.099 51.099 114.360 114.360 compute_mat_P_omega_calc_M_occ 250 9.0 14.014 14.014 75.847 75.847 dbcsr_t_copy 1103 10.7 21.279 21.279 75.594 75.594 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 59.005 59.005 dbcsr_tas_total 1149 12.2 0.050 0.050 57.207 57.207 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 56.987 56.987 dbcsr_tas_multiply 807 12.1 0.003 0.003 55.584 55.584 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.001 46.611 46.611 dbcsr_multiply_generic 837 15.8 0.141 0.141 41.157 41.157 dbcsr_tas_dbcsr 807 14.1 0.003 0.003 40.875 40.875 dbcsr_tas_reserve_blocks_index 3261 13.7 7.340 7.340 29.069 29.069 dbcsr_tas_mm_1N 524 15.1 0.002 0.002 28.563 28.563 multiply_cannon 837 16.8 0.490 0.490 26.548 26.548 dbcsr_tas_copy 574 11.4 17.747 17.747 25.683 25.683 mp2_ri_gpw_compute_in 1 5.0 0.000 0.000 23.671 23.671 multiply_cannon_loop 837 17.8 0.147 0.147 22.983 22.983 dbcsr_t_reserve_blocks_index 2280 12.5 1.312 1.312 22.477 22.477 multiply_cannon_multrec 837 18.8 20.790 20.790 21.667 21.667 dbcsr_reserve_blocks 3717 14.7 20.985 20.985 21.389 21.389 dbcsr_t_reserve_blocks_index_a 2222 11.6 0.012 0.012 21.102 21.102 compute_QP_energies 1 7.0 0.000 0.000 20.769 20.769 compute_self_energy_cubic_gw 1 8.0 0.112 0.112 20.768 20.768 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 20.411 20.411 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 15.198 15.198 dbcsr_t_copy_nocomm 251 12.0 11.994 11.994 14.450 14.450 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 12.707 12.707 make_m2s 1674 16.8 0.108 0.108 11.862 11.862 make_images 1674 17.8 5.406 5.406 11.351 11.351 dbcsr_tas_mm_2 251 15.0 0.002 0.002 10.807 10.807 ------------------------------------------------------------------------------- From /workspace/artifacts/GW_PBE_4benzene_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.009 66.310 66.311 qs_energies 1 2.0 0.000 0.001 66.180 66.187 mp2_main 1 3.0 0.000 0.000 64.495 64.502 mp2_gpw_main 1 4.0 0.001 0.001 64.429 64.436 rpa_ri_compute_en 1 5.0 0.000 0.000 62.244 62.251 rpa_num_int 1 6.0 0.001 0.001 62.236 62.243 dbcsr_t_total 2336 9.6 0.019 0.020 48.725 48.730 compute_mat_P_omega 1 7.0 0.001 0.002 47.765 47.773 compute_mat_P_omega_contract 10 8.0 0.894 0.930 47.445 47.451 dbcsr_t_contract 787 11.0 2.105 2.244 35.983 35.990 dbcsr_tas_total 1149 12.2 0.076 0.081 31.762 31.764 dbcsr_tas_multiply 807 12.1 0.003 0.003 31.627 31.630 dbcsr_tas_dbcsr 807 14.1 0.004 0.005 23.056 23.058 dbcsr_multiply_generic 837 15.8 0.080 0.086 19.486 20.355 compute_mat_P_omega_calc_M_occ 250 9.0 0.868 0.902 15.804 15.805 multiply_cannon 837 16.8 0.156 0.176 11.482 11.940 compute_mat_P_omega_calc_P_t 250 9.0 0.001 0.001 11.889 11.889 dbcsr_t_copy 1111 10.7 4.913 5.168 10.954 11.259 dbcsr_tas_mm_1N 524 15.1 0.003 0.003 10.254 11.017 multiply_cannon_loop 837 17.8 0.052 0.056 10.440 10.866 compute_mat_P_omega_calc_M_vir 250 9.0 0.001 0.002 9.982 9.983 cp_gemm 105 8.4 0.000 0.000 9.119 9.135 cp_gemm_cosma 105 9.4 9.119 9.134 9.119 9.134 dbcsr_tas_mm_2 251 15.0 0.002 0.003 8.957 8.959 multiply_cannon_multrec 1386 17.8 8.033 8.386 8.334 8.652 mp_sync 8696 11.6 7.175 8.378 7.175 8.378 make_m2s 1674 16.8 0.052 0.056 6.877 7.409 make_images 1674 17.8 0.264 0.274 6.780 7.312 GW_matrix_operations 10 7.0 0.002 0.002 5.871 5.879 compute_QP_energies 1 7.0 0.000 0.001 4.891 4.891 compute_self_energy_cubic_gw 1 8.0 0.005 0.006 4.888 4.891 dbcsr_t_communicate_buffer 1098 11.7 0.105 0.115 4.058 4.235 mp_waitall_2 3776 14.7 3.810 4.107 3.810 4.107 make_images_data 1674 18.8 0.043 0.048 3.758 3.945 hybrid_alltoall_any 1724 19.5 2.900 3.295 3.600 3.801 contract_cubic_gw 21 9.0 0.000 0.000 3.686 3.686 dbcsr_t_reserve_blocks_index 2849 12.4 0.122 0.127 3.210 3.510 dbcsr_t_reserve_blocks_index_a 2791 11.4 0.024 0.027 3.162 3.461 dbcsr_tas_reserve_blocks_index 3300 13.8 0.287 0.307 3.149 3.442 rpa_num_int_RPA_matrix_operati 10 7.0 0.000 0.000 3.299 3.312 dbcsr_reserve_blocks 3785 14.7 2.857 3.133 2.901 3.178 contract_P_omega_with_mat_L 10 8.0 0.000 0.000 3.161 3.174 make_images_pack 1674 18.8 2.501 2.911 2.519 2.929 mp_waitall_1 26582 19.0 1.927 2.374 1.927 2.374 convert_to_new_pgrid 2421 14.1 0.022 0.024 2.183 2.318 dbcsr_copy 3323 15.8 2.101 2.240 2.134 2.272 mp2_ri_gpw_compute_in 1 5.0 0.001 0.001 2.183 2.183 compute_mat_P_omega_copy_M_vir 250 9.0 0.002 0.002 2.029 2.034 dbcsr_add_anytype 909 13.7 1.171 1.239 1.834 1.900 compute_mat_P_omega_copy_M_occ 250 9.0 0.002 0.002 1.792 1.796 scf_env_do_scf 1 3.0 0.000 0.000 1.625 1.625 scf_env_do_scf_inner_loop 17 4.0 0.001 0.001 1.624 1.624 dbcsr_tas_replicate 396 14.1 0.842 0.925 1.473 1.559 mp_max_i 2057 9.6 1.070 1.356 1.070 1.356 ------------------------------------------------------------------------------- Plot: name="GW_PBE_4benzene_timings_32omp", title="Timings of GW_PBE_4benzene with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="rest", label="rest", y=142.28000000000003, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="cp_gemm_cosma", label="cp_gemm_cosma", y=175.033, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_contract", label="dbcsr_t_contract", y=51.099, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_t_copy", label="dbcsr_t_copy", y=21.279, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=20.985, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=20.79, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_sync", label="mp_sync", y=0.0, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32omp", name="mp_waitall_2", label="mp_waitall_2", y=0.0, yerr=0.0 Plot: name="GW_PBE_4benzene_timings_32mpi", title="Timings of GW_PBE_4benzene with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="rest", label="rest", y=28.298000000000002, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="cp_gemm_cosma", label="cp_gemm_cosma", y=9.119, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_contract", label="dbcsr_t_contract", y=2.105, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_t_copy", label="dbcsr_t_copy", y=4.913, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="dbcsr_reserve_blocks", label="dbcsr_reserve_blocks", y=2.857, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=8.033, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_sync", label="mp_sync", y=7.175, yerr=0.0 PlotPoint: plot="GW_PBE_4benzene_timings_32mpi", name="mp_waitall_2", label="mp_waitall_2", y=3.81, yerr=0.0 Running diag_cu144_broy.inp with 1 threads and 32 ranks... done. Running diag_cu144_broy.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/diag_cu144_broy_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.130 0.130 190.441 190.441 qs_energies 1 2.0 0.000 0.000 188.575 188.575 scf_env_do_scf 1 3.0 0.000 0.000 177.807 177.807 scf_env_do_scf_inner_loop 15 4.0 0.002 0.002 177.807 177.807 qs_ks_update_qs_env 15 5.0 0.000 0.000 73.764 73.764 rebuild_ks_matrix 15 6.0 0.000 0.000 73.377 73.377 qs_ks_build_kohn_sham_matrix 15 7.0 0.003 0.003 73.376 73.376 qs_scf_new_mos 15 5.0 0.000 0.000 70.477 70.477 eigensolver 15 6.0 0.002 0.002 55.521 55.521 qs_vxc_create 15 8.0 0.040 0.040 48.329 48.329 calculate_dispersion_nonloc 15 9.0 9.426 9.426 42.415 42.415 cp_fm_diag_elpa 15 7.0 0.000 0.000 38.810 38.810 cp_fm_diag_elpa_base 15 8.0 33.619 33.619 38.810 38.810 pw_transfer 1191 9.8 0.103 0.103 29.232 29.232 fft_wrap_pw1pw2 1086 10.9 0.014 0.014 28.902 28.902 qs_rho_update_rho 16 5.0 0.000 0.000 26.311 26.311 calculate_rho_elec 16 6.0 0.351 0.351 26.310 26.310 grid_collocate_task_list 16 7.0 24.601 24.601 24.601 24.601 sum_up_and_integrate 15 8.0 0.079 0.079 23.355 23.355 integrate_v_rspace 15 9.0 0.034 0.034 23.276 23.276 grid_integrate_task_list 15 10.0 22.577 22.577 22.577 22.577 fft_wrap_pw1pw2_150 765 12.0 3.572 3.572 21.956 21.956 copy_dbcsr_to_fm 16 5.9 0.001 0.001 12.171 12.171 fft3d_s 1087 12.8 12.153 12.153 12.164 12.164 cp_fm_cholesky_restore 45 7.0 11.838 11.838 11.838 11.838 pw_scatter_s 585 13.0 11.247 11.247 11.247 11.247 dbcsr_complete_redistribute 46 8.3 4.026 4.026 10.888 10.888 cp_fm_upper_to_full 30 8.0 10.061 10.061 10.061 10.061 vdW_energy 15 10.0 8.695 8.695 8.695 8.695 gspace_mixing 14 5.0 0.280 0.280 8.611 8.611 broyden_mixing 14 6.0 7.853 7.853 7.853 7.853 fft_wrap_pw1pw2_200 197 11.5 0.381 0.381 6.672 6.672 xc_vxc_pw_create 15 9.0 1.314 1.314 5.875 5.875 init_scf_run 1 3.0 0.000 0.000 5.076 5.076 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 4.774 4.774 dbcsr_finalize 159 9.9 0.022 0.022 4.646 4.646 dbcsr_merge_all 91 11.1 0.087 0.087 4.484 4.484 mp_alltoall_d11v 186 9.2 3.972 3.972 3.972 3.972 ------------------------------------------------------------------------------- From /workspace/artifacts/diag_cu144_broy_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.022 95.486 95.487 qs_energies 1 2.0 0.000 0.001 95.061 95.061 scf_env_do_scf 1 3.0 0.000 0.000 89.459 89.459 scf_env_do_scf_inner_loop 15 4.0 0.001 0.003 89.459 89.459 qs_ks_update_qs_env 15 5.0 0.000 0.000 42.949 42.984 rebuild_ks_matrix 15 6.0 0.000 0.000 42.894 42.929 qs_ks_build_kohn_sham_matrix 15 7.0 0.004 0.006 42.894 42.929 qs_rho_update_rho 16 5.0 0.000 0.000 25.217 25.221 calculate_rho_elec 16 6.0 0.012 0.014 25.216 25.220 sum_up_and_integrate 15 8.0 0.015 0.018 24.368 24.410 integrate_v_rspace 15 9.0 0.001 0.001 24.353 24.395 grid_collocate_task_list 16 7.0 23.044 23.388 23.044 23.388 grid_integrate_task_list 15 10.0 22.410 22.994 22.410 22.994 qs_scf_new_mos 15 5.0 0.001 0.001 21.762 22.050 eigensolver 15 6.0 0.002 0.002 20.014 20.032 qs_vxc_create 15 8.0 0.001 0.002 17.899 17.912 calculate_dispersion_nonloc 15 9.0 1.428 1.466 14.564 14.594 pw_transfer 1191 9.8 0.141 0.153 14.294 14.422 cp_fm_diag_elpa 15 7.0 0.000 0.000 14.368 14.380 cp_fm_diag_elpa_base 15 8.0 14.078 14.127 14.360 14.364 fft_wrap_pw1pw2 1086 10.9 0.021 0.024 13.983 14.119 fft3d_ps 1086 12.9 6.087 6.487 10.703 10.918 fft_wrap_pw1pw2_150 765 12.0 0.712 0.758 9.449 9.497 cp_fm_cholesky_restore 45 7.0 5.371 5.443 5.371 5.443 fft_wrap_pw1pw2_200 197 11.5 0.390 0.412 4.343 4.468 qs_energies_init_hamiltonians 1 3.0 0.000 0.000 3.408 3.408 mp_alltoall_z22v 1086 14.9 2.914 3.404 2.914 3.404 xc_vxc_pw_create 15 9.0 0.068 0.092 3.334 3.357 build_core_hamiltonian_matrix 1 4.0 0.000 0.000 2.887 3.222 x_to_yz 585 14.0 1.077 1.126 2.580 2.743 rs_pw_transfer 158 9.4 0.003 0.003 2.072 2.653 yz_to_x 501 13.7 0.591 0.645 2.002 2.301 vdW_energy 15 10.0 2.165 2.271 2.165 2.271 density_rs2pw 16 7.0 0.002 0.002 1.970 2.269 build_core_ppnl 1 5.0 1.938 2.209 1.938 2.209 mp_waitany 520 11.3 1.368 2.041 1.368 2.041 ------------------------------------------------------------------------------- Plot: name="diag_cu144_broy_timings_32omp", title="Timings of diag_cu144_broy with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32omp", name="rest", label="rest", y=85.653, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=33.619, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=24.601, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.577, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_s", label="fft3d_s", y=12.153, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=11.838, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 Plot: name="diag_cu144_broy_timings_32mpi", title="Timings of diag_cu144_broy with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="rest", label="rest", y=24.49600000000001, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_diag_elpa_base", label="cp_fm_diag_elpa_base", y=14.078, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=23.044, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=22.41, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="cp_fm_cholesky_restore", label="cp_fm_cholesky_restore", y=5.371, yerr=0.0 PlotPoint: plot="diag_cu144_broy_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=6.087, yerr=0.0 Running bench_dftb.inp with 1 threads and 32 ranks... done. Running bench_dftb.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/bench_dftb_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.093 0.093 317.781 317.781 qs_energies 1 2.0 0.000 0.000 317.609 317.609 ls_scf 1 3.0 0.000 0.000 315.640 315.640 ls_scf_main 1 4.0 0.002 0.002 300.900 300.900 ls_scf_dm_to_ks 11 5.0 0.000 0.000 147.013 147.013 density_matrix_trs4 11 5.0 0.012 0.012 146.452 146.452 matrix_ls_to_qs 11 6.0 0.000 0.000 142.157 142.157 dbcsr_multiply_generic 185 6.1 0.491 0.491 99.407 99.407 dbcsr_copy_into_existing 11 7.0 86.704 86.704 86.704 86.704 dbcsr_complete_redistribute 23 7.5 43.997 43.997 60.656 60.656 multiply_cannon 185 7.1 0.355 0.355 59.908 59.908 matrix_decluster 11 7.0 0.000 0.000 55.451 55.451 multiply_cannon_loop 185 8.1 0.407 0.407 40.428 40.428 multiply_cannon_multrec 185 9.1 38.064 38.064 38.116 38.116 make_m2s 370 7.1 0.031 0.031 33.059 33.059 make_images 370 8.1 7.712 7.712 30.534 30.534 arnoldi_extremal 12 6.1 0.000 0.000 24.692 24.692 arnoldi_normal_ev 12 7.1 0.027 0.027 24.692 24.692 build_subspace 23 8.1 0.138 0.138 24.059 24.059 dbcsr_matrix_vector_mult 652 9.0 0.220 0.220 23.054 23.054 dbcsr_finalize 646 7.5 0.224 0.224 22.192 22.192 dbcsr_matrix_vector_mult_local 652 10.0 21.739 21.739 21.760 21.760 dbcsr_merge_all 597 8.5 3.639 3.639 20.314 20.314 setup_rec_index_2d 370 8.1 18.964 18.964 18.964 18.964 dbcsr_sort_indices 1103 9.9 18.553 18.553 18.553 18.553 quick_finalize 395 10.0 0.549 0.549 15.913 15.913 dbcsr_special_finalize 370 9.1 0.003 0.003 14.648 14.648 tree_to_linear_d 110 9.4 14.242 14.242 14.242 14.242 ls_scf_init_scf 1 4.0 0.000 0.000 13.789 13.789 ls_scf_init_matrix_S 1 5.0 0.000 0.000 13.311 13.311 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 12.385 12.385 dbcsr_dot_sd 144 6.3 9.587 9.587 9.588 9.588 dbcsr_frobenius_norm 142 6.1 8.577 8.577 8.579 8.579 dbcsr_new_transposed 2 7.0 0.142 0.142 8.183 8.183 dbcsr_redistribute 2 8.0 7.928 7.928 8.002 8.002 make_images_data 370 9.1 0.011 0.011 7.972 7.972 matrix_qs_to_ls 12 5.1 0.000 0.000 7.746 7.746 matrix_cluster 12 6.1 0.000 0.000 7.746 7.746 hybrid_alltoall_any 393 9.9 5.883 5.883 6.732 6.732 ------------------------------------------------------------------------------- From /workspace/artifacts/bench_dftb_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.012 0.014 107.007 107.008 qs_energies 1 2.0 0.000 0.000 106.901 106.902 ls_scf 1 3.0 0.000 0.000 106.791 106.792 ls_scf_main 1 4.0 0.001 0.003 102.580 102.581 density_matrix_trs4 11 5.0 0.010 0.014 98.567 98.654 dbcsr_multiply_generic 185 6.1 0.079 0.093 92.258 92.599 multiply_cannon 185 7.1 0.052 0.057 77.616 78.899 multiply_cannon_loop 185 8.1 0.255 0.271 73.323 75.587 multiply_cannon_multrec 1480 9.1 48.757 51.247 49.311 51.786 mp_waitall_1 11936 10.3 21.527 24.779 21.527 24.779 multiply_cannon_metrocomm3 1480 9.1 0.022 0.025 12.760 17.586 make_m2s 370 7.1 0.037 0.041 10.137 10.243 make_images 370 8.1 0.740 0.759 10.007 10.121 multiply_cannon_metrocomm1 1480 9.1 0.012 0.014 5.121 7.024 calculate_norms 2960 9.1 5.791 5.983 5.791 5.983 arnoldi_extremal 12 6.1 0.001 0.001 4.589 4.597 arnoldi_normal_ev 12 7.1 0.002 0.008 4.588 4.597 make_images_data 370 9.1 0.015 0.016 4.094 4.502 build_subspace 23 8.1 0.044 0.059 4.435 4.439 mp_sum_l 1039 5.9 3.035 4.415 3.035 4.415 dbcsr_matrix_vector_mult 652 9.0 0.020 0.085 3.652 3.749 hybrid_alltoall_any 393 9.9 0.361 1.855 3.389 3.587 ls_scf_dm_to_ks 11 5.0 0.000 0.000 3.471 3.544 ls_scf_init_scf 1 4.0 0.000 0.000 3.226 3.228 ls_scf_init_matrix_S 1 5.0 0.000 0.000 3.190 3.198 dbcsr_multiply_generic_mpsum_f 137 7.1 0.001 0.001 2.014 3.182 dbcsr_complete_redistribute 23 7.5 1.911 2.035 3.068 3.179 matrix_ls_to_qs 11 6.0 0.000 0.000 3.018 3.123 make_images_pack 370 9.1 2.758 2.965 2.765 2.972 matrix_sqrt_Newton_Schulz 1 6.0 0.001 0.001 2.914 2.917 matrix_decluster 11 7.0 0.000 0.000 2.763 2.868 dbcsr_matrix_vector_mult_local 652 10.0 2.682 2.778 2.687 2.784 buffer_matrices_ensure_size 370 8.1 2.513 2.645 2.513 2.645 dbcsr_add_d 280 6.0 0.002 0.004 2.425 2.542 dbcsr_add_anytype 280 7.0 1.342 1.440 2.423 2.540 dbcsr_finalize 646 7.5 0.016 0.018 2.150 2.245 ------------------------------------------------------------------------------- Plot: name="bench_dftb_timings_32omp", title="Timings of bench_dftb with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32omp", name="rest", label="rest", y=108.31300000000002, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=86.704, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=43.997, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=38.064, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=21.739, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="setup_rec_index_2d", label="setup_rec_index_2d", y=18.964, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="make_images_pack", label="make_images_pack", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32omp", name="calculate_norms", label="calculate_norms", y=0.0, yerr=0.0 Plot: name="bench_dftb_timings_32mpi", title="Timings of bench_dftb with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="bench_dftb_timings_32mpi", name="rest", label="rest", y=20.546000000000006, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_copy_into_existing", label="dbcsr_copy_into_existing", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_complete_redistribute", label="dbcsr_complete_redistribute", y=1.911, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=48.757, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="dbcsr_matrix_vector_mult_local", label="dbcsr_matrix_vector_mult_local", y=2.682, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="setup_rec_index_2d", label="setup_rec_index_2d", y=0.0, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=21.527, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="make_images_pack", label="make_images_pack", y=2.758, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=3.035, yerr=0.0 PlotPoint: plot="bench_dftb_timings_32mpi", name="calculate_norms", label="calculate_norms", y=5.791, yerr=0.0 Running dbcsr.inp with 1 threads and 32 ranks... done. Running dbcsr.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/dbcsr_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.006 0.006 121.473 121.473 lib_test 1 2.0 0.000 0.000 121.466 121.466 dbcsr_run_tests 3 3.0 0.003 0.003 121.465 121.465 test_multiplies_multiproc 3 4.0 0.001 0.001 99.869 99.869 dbcsr_redistribute 9 5.0 67.977 67.977 71.785 71.785 dbcsr_multiply_generic 9 5.0 0.001 0.001 25.871 25.871 dbcsr_make_random_matrix 9 4.0 15.888 15.888 21.508 21.508 multiply_cannon 9 6.0 0.003 0.003 18.710 18.710 multiply_cannon_loop 9 7.0 0.005 0.005 18.138 18.138 multiply_cannon_multrec 9 8.0 18.131 18.131 18.132 18.132 dbcsr_finalize 27 5.7 0.004 0.004 9.706 9.706 dbcsr_merge_all 18 6.5 3.484 3.484 8.929 8.929 mp_alltoall_d11v 27 6.0 3.471 3.471 3.471 3.471 tree_to_linear_d 9 7.0 3.425 3.425 3.425 3.425 dbcsr_data_release 975 7.6 2.549 2.549 2.549 2.549 ------------------------------------------------------------------------------- From /workspace/artifacts/dbcsr_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.003 0.005 30.278 30.279 lib_test 1 2.0 0.000 0.001 30.243 30.268 dbcsr_run_tests 3 3.0 0.001 0.001 30.241 30.266 test_multiplies_multiproc 3 4.0 0.001 0.001 29.057 29.148 dbcsr_multiply_generic 9 5.0 0.002 0.002 26.998 27.090 multiply_cannon 9 6.0 0.003 0.004 24.414 24.862 multiply_cannon_loop 9 7.0 0.004 0.005 23.931 24.371 multiply_cannon_multrec 72 8.0 20.183 20.824 20.184 20.825 mp_waitall_1 576 9.2 4.210 5.068 4.210 5.068 multiply_cannon_metrocomm1 72 8.0 0.002 0.003 3.288 4.164 mp_sum_l 310 2.7 0.560 1.453 0.560 1.453 dbcsr_multiply_generic_mpsum_f 9 6.0 0.000 0.000 0.554 1.447 multiply_cannon_metrocomm3 72 8.0 0.000 0.001 0.445 1.261 dbcsr_make_random_matrix 9 4.0 0.888 0.912 1.135 1.173 make_m2s 18 6.0 0.001 0.001 1.079 1.146 make_images 18 7.0 0.027 0.028 1.076 1.143 dbcsr_finalize 27 5.7 0.001 0.001 1.021 1.123 dbcsr_merge_all 18 6.5 0.167 0.188 0.887 0.962 dbcsr_data_release 444 7.6 0.723 0.841 0.723 0.841 dbcsr_redistribute 9 5.0 0.429 0.480 0.754 0.790 dbcsr_destroy 111 5.9 0.010 0.061 0.601 0.692 make_images_data 18 8.0 0.001 0.001 0.535 0.635 ------------------------------------------------------------------------------- Plot: name="dbcsr_timings_32omp", title="Timings of dbcsr with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32omp", name="rest", label="rest", y=9.972999999999985, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_redistribute", label="dbcsr_redistribute", y=67.977, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=18.131, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=15.888, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_merge_all", label="dbcsr_merge_all", y=3.484, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=3.471, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="dbcsr_data_release", label="dbcsr_data_release", y=2.549, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_waitall_1", label="mp_waitall_1", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32omp", name="mp_sum_l", label="mp_sum_l", y=0.0, yerr=0.0 Plot: name="dbcsr_timings_32mpi", title="Timings of dbcsr with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="dbcsr_timings_32mpi", name="rest", label="rest", y=3.1179999999999986, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_redistribute", label="dbcsr_redistribute", y=0.429, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="multiply_cannon_multrec", label="multiply_cannon_multrec", y=20.183, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_make_random_matrix", label="dbcsr_make_random_matrix", y=0.888, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_merge_all", label="dbcsr_merge_all", y=0.167, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_alltoall_d11v", label="mp_alltoall_d11v", y=0.0, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="dbcsr_data_release", label="dbcsr_data_release", y=0.723, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_waitall_1", label="mp_waitall_1", y=4.21, yerr=0.0 PlotPoint: plot="dbcsr_timings_32mpi", name="mp_sum_l", label="mp_sum_l", y=0.56, yerr=0.0 Running MQAE_single_node.inp with 1 threads and 32 ranks... done. Running MQAE_single_node.inp with 32 threads and 1 ranks... done. From /workspace/artifacts/MQAE_single_node_32omp.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.046 160.496 160.496 qs_mol_dyn_low 1 2.0 0.005 0.005 158.443 158.443 velocity_verlet 5 3.0 0.005 0.005 128.803 128.803 qmmm_el_coupling 6 3.8 0.000 0.000 79.852 79.852 qmmm_elec_with_gaussian 6 4.8 0.189 0.189 79.844 79.844 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 78.094 78.094 qmmm_elec_gaussian_low_G 6 6.8 76.540 76.540 76.540 76.540 qs_forces 6 3.8 0.001 0.001 58.281 58.281 qs_energies 6 4.8 0.001 0.001 51.677 51.677 scf_env_do_scf 6 5.8 0.001 0.001 47.897 47.897 scf_env_do_scf_inner_loop 39 6.8 0.003 0.003 41.621 41.621 rebuild_ks_matrix 45 8.4 0.000 0.000 41.095 41.095 qs_ks_build_kohn_sham_matrix 45 9.4 0.008 0.008 41.095 41.095 qs_ks_update_qs_env 45 7.8 0.000 0.000 35.188 35.188 pw_transfer 966 11.9 0.077 0.077 25.404 25.404 fft_wrap_pw1pw2 801 13.0 0.009 0.009 25.023 25.023 fft_wrap_pw1pw2_150 507 14.3 2.572 2.572 24.448 24.448 qs_vxc_create 45 10.4 0.001 0.001 22.078 22.078 xc_vxc_pw_create 45 11.4 3.870 3.870 22.077 22.077 qs_rho_update_rho 45 7.9 0.000 0.000 11.065 11.065 calculate_rho_elec 45 8.9 0.920 0.920 11.064 11.064 pw_scatter_s 429 15.4 11.041 11.041 11.041 11.041 fist_calc_energy_force 6 3.8 0.002 0.002 10.439 10.439 xc_rho_set_and_dset_create 45 12.4 0.258 0.258 10.418 10.418 fft3d_s 802 15.0 9.878 9.878 9.888 9.888 pw_integral_ab 2539 7.4 9.229 9.229 9.229 9.229 force_nonbond 6 4.8 9.058 9.058 9.058 9.058 qmmm_forces 6 3.8 0.001 0.001 9.019 9.019 qmmm_forces_with_gaussian 6 4.8 0.162 0.162 8.502 8.502 qs_ks_ddapc 45 10.4 0.001 0.001 7.204 7.204 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 6.422 6.422 init_scf_loop 6 6.8 0.000 0.000 6.270 6.270 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 5.920 5.920 pw_poisson_solve 51 9.9 2.469 2.469 5.698 5.698 qmmm_forces_gaussian_low_G 6 6.8 5.351 5.351 5.351 5.351 grid_collocate_task_list 45 9.9 5.118 5.118 5.118 5.118 density_rs2pw 45 9.9 0.003 0.003 5.027 5.027 sum_up_and_integrate 45 10.4 0.234 0.234 4.638 4.638 cp_ddapc_apply_CD 45 11.4 0.006 0.006 4.472 4.472 integrate_v_rspace 45 11.4 0.011 0.011 4.404 4.404 ------------------------------------------------------------------------------- From /workspace/artifacts/MQAE_single_node_32mpi.out: ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.036 0.039 98.772 98.773 qs_mol_dyn_low 1 2.0 0.006 0.006 97.116 97.215 qs_forces 6 3.8 0.001 0.001 73.012 73.012 qs_energies 6 4.8 0.001 0.001 69.646 69.647 scf_env_do_scf 6 5.8 0.000 0.001 67.935 67.935 scf_env_do_scf_inner_loop 113 6.2 0.003 0.010 65.238 65.239 rebuild_ks_matrix 119 8.1 0.000 0.001 48.535 48.553 qs_ks_build_kohn_sham_matrix 119 9.1 0.023 0.025 48.534 48.553 qs_ks_update_qs_env 119 7.3 0.001 0.001 45.639 45.657 velocity_verlet 5 3.0 0.002 0.003 39.729 39.733 pw_transfer 2446 11.8 0.308 0.321 32.016 32.133 fft_wrap_pw1pw2 2059 12.8 0.036 0.040 31.054 31.202 fft_wrap_pw1pw2_150 1321 14.0 2.646 2.850 30.062 30.218 qs_vxc_create 119 10.1 0.004 0.005 25.030 25.037 xc_vxc_pw_create 119 11.1 0.528 0.705 25.025 25.032 fft3d_ps 2059 14.8 14.017 15.241 23.455 23.690 qs_rho_update_rho 119 7.3 0.001 0.001 18.776 18.779 calculate_rho_elec 119 8.3 0.088 0.097 18.775 18.778 sum_up_and_integrate 119 10.1 0.098 0.107 17.086 17.156 integrate_v_rspace 119 11.1 0.005 0.006 16.987 17.063 rs_pw_transfer 988 11.5 0.017 0.019 13.372 13.985 qmmm_forces 6 3.8 0.003 0.003 13.372 13.372 qmmm_forces_with_gaussian 6 4.8 0.468 0.558 12.962 13.082 density_rs2pw 119 9.3 0.011 0.013 11.785 12.319 xc_rho_set_and_dset_create 119 12.1 0.542 0.643 11.584 11.971 potential_pw2rs 119 12.1 0.011 0.013 10.539 10.554 qmmm_el_coupling 6 3.8 0.000 0.000 9.518 9.563 qmmm_elec_with_gaussian 6 4.8 0.429 0.584 9.514 9.560 mp_alltoall_z22v 2059 16.8 5.805 7.640 5.805 7.640 grid_collocate_task_list 119 9.3 6.644 7.036 6.644 7.036 grid_integrate_task_list 119 12.1 5.962 6.261 5.962 6.261 qmmm_force_with_gaussian_low 6 5.8 0.000 0.000 5.882 6.062 rs_pw_transfer_PW2RS_150 125 13.9 3.013 3.092 5.831 5.879 yz_to_x 964 15.3 1.397 1.561 4.273 5.546 rs_pw_transfer_RS2PW_150 125 11.2 2.360 2.505 4.810 5.414 x_to_yz 1095 16.3 2.181 2.427 5.111 5.413 pw_restrict_s3 18 5.8 2.395 2.424 5.246 5.330 mp_waitany 4028 12.8 4.194 5.171 4.194 5.171 qmmm_forces_gaussian_low_G 6 6.8 4.826 5.012 4.826 5.012 qmmm_elec_with_gaussian:spline 6 5.8 0.000 0.000 4.206 4.248 pw_prolongate_s3 18 6.8 1.914 1.974 4.206 4.247 qs_scf_new_mos 113 7.2 0.001 0.001 4.156 4.166 qs_scf_loop_do_ot 113 8.2 0.001 0.001 4.155 4.165 pw_integral_ab 2761 7.7 3.545 3.583 3.971 4.137 ot_scf_mini 113 9.2 0.002 0.002 3.974 3.983 dbcsr_multiply_generic 2588 12.3 0.102 0.118 3.727 3.854 qs_ks_ddapc 119 10.1 0.003 0.003 3.353 3.496 qmmm_elec_with_gaussian_low 6 5.8 0.000 0.000 3.403 3.488 qs_ks_update_qs_env_forces 6 4.8 0.000 0.000 2.910 2.911 mp_sum_dm3 33 5.7 2.602 2.725 2.602 2.725 init_scf_loop 6 6.8 0.000 0.000 2.693 2.693 pw_gather_p 964 14.3 2.472 2.625 2.472 2.625 mp_waitall_1 188862 16.2 2.388 2.562 2.388 2.562 ot_mini 113 10.2 0.001 0.001 2.507 2.520 qmmm_elec_gaussian_low_G 6 6.8 2.446 2.517 2.446 2.517 pw_scatter_p 1095 15.3 2.383 2.493 2.383 2.493 pw_derive 732 12.5 1.962 2.092 1.962 2.092 qs_ot_get_derivative 113 11.2 0.001 0.001 1.981 1.990 ------------------------------------------------------------------------------- Plot: name="MQAE_single_node_timings_32omp", title="Timings of MQAE_single_node with 32 OpenMP Threads", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32omp", name="rest", label="rest", y=34.281000000000006, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=76.54, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_scatter_s", label="pw_scatter_s", y=11.041, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_s", label="fft3d_s", y=9.878, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="pw_integral_ab", label="pw_integral_ab", y=9.229, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="force_nonbond", label="force_nonbond", y=9.058, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=5.351, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_collocate_task_list", label="grid_collocate_task_list", y=5.118, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="grid_integrate_task_list", label="grid_integrate_task_list", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="fft3d_ps", label="fft3d_ps", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32omp", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=0.0, yerr=0.0 Plot: name="MQAE_single_node_timings_32mpi", title="Timings of MQAE_single_node with 32 MPI Ranks", ylabel="time [s]" PlotPoint: plot="MQAE_single_node_timings_32mpi", name="rest", label="rest", y=55.52700000000001, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_elec_gaussian_low_G", label="qmmm_elec_gaussian_low_G", y=2.446, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_scatter_s", label="pw_scatter_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_s", label="fft3d_s", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="pw_integral_ab", label="pw_integral_ab", y=3.545, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="force_nonbond", label="force_nonbond", y=0.0, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="qmmm_forces_gaussian_low_G", label="qmmm_forces_gaussian_low_G", y=4.826, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_collocate_task_list", label="grid_collocate_task_list", y=6.644, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="grid_integrate_task_list", label="grid_integrate_task_list", y=5.962, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="fft3d_ps", label="fft3d_ps", y=14.017, yerr=0.0 PlotPoint: plot="MQAE_single_node_timings_32mpi", name="mp_alltoall_z22v", label="mp_alltoall_z22v", y=5.805, yerr=0.0 Summary: Performance test works fine. Status: OK Uploading artifacts... done EndDate: 2021-10-08 21:10:37+00:00