=== This is the CP2K Performance-Test === Already up to date. Current branch master is up to date. Already up to date. Current branch master is up to date. GIT Revision: cebb65e3a93fe490e8f7435957a27a406ba398e1 ################# ARCHITECTURE FILE ################## #!/bin/bash # # CP2K arch file for Cray-XC40 (Piz Daint, CSCS, multi-core partition) # # Tested with: GNU 11.2.0, Cray-MPICH 7.7.18, # Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10, # COSMA 2.6.6, ELPA 2023.05.001, HDF5 1.14.2, # LIBINT 2.6.0, LIBPEXSI 1.2.0, LIBXC 6.2.2, # LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.9.0, # SIRIUS 7.5.2, SPGLIB 1.16.2, LIBGRPP 20231215, # SPFFT 1.0.6, SPLA 1.5.5 # # Usage: Source this arch file and then run make as instructed. # A full toolchain installation is performed as default. # Replace or adapt the "module add" commands below if needed. # # Last update: 12.03.2024 # # \ if [ "${0}" = "${BASH_SOURCE}" ]; then \ echo "ERROR: Script ${0##*/} must be sourced"; \ echo "Usage: source ${0##*/}"; \ exit 1; \ fi; \ this_file=${BASH_SOURCE##*/}; \ if [ -n "${1}" ]; then \ gcc_version="${1}"; \ else \ gcc_version="11.2.0"; \ fi; \ module add daint-mc; \ module rm PrgEnv-cray; \ module add PrgEnv-gnu; \ module rm gcc; \ module add gcc/${gcc_version}; \ module add cray-fftw/3.3.8.10; \ echo "Expected setup:"; \ echo " cray-mpich/7.7.18"; \ echo " craype-broadwell"; \ echo " daint-mc/21.09"; \ echo " craype/2.7.10"; \ echo " cray-libsci/20.09.1"; \ echo " PrgEnv-gnu/6.0.10"; \ echo " gcc/${gcc_version}"; \ echo " cray-fftw/3.3.8.10"; \ module list; \ module -f save cp2k_mc_gnu_psmp; \ echo "To load the required modules in your batch job script, use:"; \ echo " module restore cp2k_mc_gnu_psmp"; \ cd tools/toolchain; \ ./install_cp2k_toolchain.sh -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \ cd ../..; \ printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \ source ${PWD}/tools/toolchain/install/setup; \ printf "done\n"; \ echo "Check the output above for error messages and consistency!"; \ echo; \ echo "If everything is OK, you can build a CP2K production binary with"; \ echo " make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \ echo; \ echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \ echo " make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \ echo "or build CP2K as a library with"; \ echo " make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \ echo; \ return # Set options DO_CHECKS := no USE_COSMA := 2.6.6 USE_ELPA := 2023.05.001 USE_HDF5 := 1.14.2 USE_LIBGRPP := 20231215 USE_LIBINT := 2.6.0 USE_LIBPEXSI := 1.2.0 USE_LIBVORI := 220621 USE_LIBXC := 6.2.2 USE_LIBXSMM := 1.17 USE_PLUMED := 2.9.0 USE_SPFFT := 1.0.6 USE_SPLA := 1.5.5 #USE_QUIP := 0.9.10 USE_SIRIUS := 7.5.2 USE_SPGLIB := 1.16.2 # Only needed for SIRIUS LIBVDWXC_VER := 0.4.0 # Only needed for LIBPEXSI SCOTCH_VER := 6.0.0 SUPERLU_VER := 6.1.0 LMAX := 5 MAX_CONTR := 4 CC := cc FC := ftn LD := ftn AR := ar -r # cc, CC, and ftn include already the proper -march flag CFLAGS := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g DFLAGS := -D__parallel DFLAGS += -D__SCALAPACK DFLAGS += -D__FFTW3 DFLAGS += -D__MAX_CONTR=$(strip $(MAX_CONTR)) INSTALL_PATH := $(PWD)/tools/toolchain/install ifeq ($(DO_CHECKS), yes) DFLAGS += -D__CHECK_DIAG endif ifneq ($(USE_PLUMED),) USE_PLUMED := $(strip $(USE_PLUMED)) PLUMED_LIB := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib DFLAGS += -D__PLUMED2 USE_GSL := 2.7 LIBS += $(PLUMED_LIB)/libplumed.a endif ifneq ($(USE_ELPA),) USE_ELPA := $(strip $(USE_ELPA)) TARGET := cpu ELPA_INC := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA) ELPA_LIB := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib CFLAGS += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules DFLAGS += -D__ELPA LIBS += $(ELPA_LIB)/libelpa.a endif ifneq ($(USE_QUIP),) USE_QUIP := $(strip $(USE_QUIP)) QUIP_INC := $(INSTALL_PATH)/quip-$(USE_QUIP)/include QUIP_LIB := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib CFLAGS += -I$(QUIP_INC) DFLAGS += -D__QUIP LIBS += $(QUIP_LIB)/libquip_core.a LIBS += $(QUIP_LIB)/libatoms.a LIBS += $(QUIP_LIB)/libFoX_sax.a LIBS += $(QUIP_LIB)/libFoX_common.a LIBS += $(QUIP_LIB)/libFoX_utils.a LIBS += $(QUIP_LIB)/libFoX_fsys.a endif ifneq ($(USE_LIBPEXSI),) USE_LIBPEXSI := $(strip $(USE_LIBPEXSI)) SCOTCH_VER := $(strip $(SCOTCH_VER)) SUPERLU_VER := $(strip $(SUPERLU_VER)) LIBPEXSI_INC := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include LIBPEXSI_LIB := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib SCOTCH_INC := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include SCOTCH_LIB := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib SUPERLU_INC := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include SUPERLU_LIB := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib CFLAGS += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC) DFLAGS += -D__LIBPEXSI LIBS += $(LIBPEXSI_LIB)/libpexsi.a LIBS += $(SUPERLU_LIB)/libsuperlu_dist.a LIBS += $(SCOTCH_LIB)/libptscotchparmetis.a LIBS += $(SCOTCH_LIB)/libptscotch.a LIBS += $(SCOTCH_LIB)/libptscotcherr.a LIBS += $(SCOTCH_LIB)/libscotchmetis.a LIBS += $(SCOTCH_LIB)/libscotch.a endif ifneq ($(USE_LIBVORI),) USE_LIBVORI := $(strip $(USE_LIBVORI)) LIBVORI_LIB := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib DFLAGS += -D__LIBVORI LIBS += $(LIBVORI_LIB)/libvori.a endif ifneq ($(USE_LIBXC),) USE_LIBXC := $(strip $(USE_LIBXC)) LIBXC_INC := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include LIBXC_LIB := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib CFLAGS += -I$(LIBXC_INC) DFLAGS += -D__LIBXC LIBS += $(LIBXC_LIB)/libxcf03.a LIBS += $(LIBXC_LIB)/libxc.a endif ifneq ($(USE_LIBGRPP),) USE_LIBGRPP := $(strip $(USE_LIBGRPP)) LIBGRPP_INC := $(INSTALL_PATH)/libgrpp-main-$(USE_LIBGRPP)/include LIBGRPP_LIB := $(INSTALL_PATH)/libgrpp-main-$(USE_LIBGRPP)/lib CFLAGS += -I$(LIBGRPP_INC) DFLAGS += -D__LIBGRPP LIBS += $(LIBGRPP_LIB)/liblibgrpp.a endif ifneq ($(USE_LIBINT),) USE_LIBINT := $(strip $(USE_LIBINT)) LMAX := $(strip $(LMAX)) LIBINT_INC := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include LIBINT_LIB := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib CFLAGS += -I$(LIBINT_INC) DFLAGS += -D__LIBINT LIBS += $(LIBINT_LIB)/libint2.a endif ifneq ($(USE_SPGLIB),) USE_SPGLIB := $(strip $(USE_SPGLIB)) SPGLIB_INC := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include SPGLIB_LIB := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib CFLAGS += -I$(SPGLIB_INC) DFLAGS += -D__SPGLIB LIBS += $(SPGLIB_LIB)/libsymspg.a endif ifneq ($(USE_LIBXSMM),) USE_LIBXSMM := $(strip $(USE_LIBXSMM)) LIBXSMM_INC := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include LIBXSMM_LIB := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib CFLAGS += -I$(LIBXSMM_INC) DFLAGS += -D__LIBXSMM LIBS += $(LIBXSMM_LIB)/libxsmmf.a LIBS += $(LIBXSMM_LIB)/libxsmm.a endif ifneq ($(USE_SIRIUS),) USE_SIRIUS := $(strip $(USE_SIRIUS)) LIBVDWXC_VER := $(strip $(LIBVDWXC_VER)) LIBVDWXC_INC := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include LIBVDWXC_LIB := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib SIRIUS_INC := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include SIRIUS_LIB := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib CFLAGS += -I$(LIBVDWXC_INC) CFLAGS += -I$(SIRIUS_INC) DFLAGS += -D__LIBVDWXC DFLAGS += -D__SIRIUS LIBS += $(SIRIUS_LIB)/libsirius.a LIBS += $(LIBVDWXC_LIB)/libvdwxc.a endif ifneq ($(USE_SPFFT),) USE_SPFFT := $(strip $(USE_SPFFT)) SPFFT_INC := $(INSTALL_PATH)/SpFFT-$(USE_SPFFT)/include SPFFT_LIB := $(INSTALL_PATH)/SpFFT-$(USE_SPFFT)/lib CFLAGS += -I$(SPFFT_INC) DFLAGS += -D__SPFFT LIBS += $(SPFFT_LIB)/libspfft.a endif ifneq ($(USE_SPLA),) USE_SPLA := $(strip $(USE_SPLA)) SPLA_INC := $(INSTALL_PATH)/SpLA-$(USE_SPLA)/include/spla SPLA_LIB := $(INSTALL_PATH)/SpLA-$(USE_SPLA)/lib CFLAGS += -I$(SPLA_INC) DFLAGS += -D__SPLA LIBS += $(SPLA_LIB)/libspla.a endif ifneq ($(USE_HDF5),) USE_HDF5 := $(strip $(USE_HDF5)) HDF5_INC := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/include HDF5_LIB := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/lib CFLAGS += -I$(HDF5_INC) DFLAGS += -D__HDF5 LIBS += $(HDF5_LIB)/libhdf5_fortran.a LIBS += $(HDF5_LIB)/libhdf5_hl.a LIBS += $(HDF5_LIB)/libhdf5.a endif ifneq ($(USE_COSMA),) USE_COSMA := $(strip $(USE_COSMA)) COSMA_INC := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include COSMA_LIB := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib CFLAGS += -I$(COSMA_INC) DFLAGS += -D__COSMA LIBS += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a LIBS += $(COSMA_LIB)/libcosma.a LIBS += $(COSMA_LIB)/libcosta.a endif ifneq ($(USE_GSL),) USE_GSL := $(strip $(USE_GSL)) GSL_INC := $(INSTALL_PATH)/gsl-$(USE_GSL)/include GSL_LIB := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib CFLAGS += -I$(GSL_INC) DFLAGS += -D__GSL LIBS += $(GSL_LIB)/libgsl.a endif CFLAGS += $(DFLAGS) FCFLAGS := $(CFLAGS) ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes) FCFLAGS += -fallow-argument-mismatch endif FCFLAGS += -fbacktrace FCFLAGS += -ffree-form FCFLAGS += -ffree-line-length-none FCFLAGS += -fno-omit-frame-pointer FCFLAGS += -std=f2008 LDFLAGS := $(FCFLAGS) -static LIBS += -lz -ldl -lstdc++ # End ############### END ARCHITECTURE FILE ################ ===== TESTS (description) ===== ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-32 RI-RPA/RI-MP2 correlation energy input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp'] output file: result.log # nodes = 4 # ranks/node = 4 # threads/rank = 9 nrepeat = 1 time[min] = 15 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/01 job id: 52197187 --- Point --- name: 10 plot: h2o_32_ri_rpa_mp2 regex: Total RI-RPA Time= label: RI-RPA (4n/4r/9t) --- Point --- name: 11 plot: h2o_32_ri_rpa_mp2_mem regex: Estimated peak process memory label: RI-RPA (4n/4r/9t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-32 RI-RPA/RI-MP2 correlation energy input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp'] output file: result.log # nodes = 4 # ranks/node = 12 # threads/rank = 3 nrepeat = 1 time[min] = 15 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/02 job id: 52197188 --- Point --- name: 20 plot: h2o_32_ri_rpa_mp2 regex: Total MP2 Time= label: RI-MP2 (4n/12r/3t) --- Point --- name: 21 plot: h2o_32_ri_rpa_mp2_mem regex: Estimated peak process memory label: RI-MP2 (4n/12r/3t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 36 # threads/rank = 1 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/03 job id: 52197189 --- Point --- name: 100 plot: h2o_64_md regex: CP2K label: (4n/36r/1t) --- Point --- name: 101 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/36r/1t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 18 # threads/rank = 2 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/04 job id: 52197190 --- Point --- name: 102 plot: h2o_64_md regex: CP2K label: (4n/18r/2t) --- Point --- name: 103 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/18r/2t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 12 # threads/rank = 3 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/05 job id: 52197191 --- Point --- name: 104 plot: h2o_64_md regex: CP2K label: (4n/12r/3t) --- Point --- name: 105 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/12r/3t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 9 # threads/rank = 4 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/06 job id: 52197192 --- Point --- name: 106 plot: h2o_64_md regex: CP2K label: (4n/9r/4t) --- Point --- name: 107 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/9r/4t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 6 # threads/rank = 6 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/07 job id: 52197193 --- Point --- name: 108 plot: h2o_64_md regex: CP2K label: (4n/6r/6t) --- Point --- name: 109 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/6r/6t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 4 # threads/rank = 9 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/08 job id: 52197194 --- Point --- name: 110 plot: h2o_64_md regex: CP2K label: (4n/4r/9t) --- Point --- name: 111 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/4r/9t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 3 # threads/rank = 12 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/09 job id: 52197196 --- Point --- name: 112 plot: h2o_64_md regex: CP2K label: (4n/3r/12t) --- Point --- name: 113 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/3r/12t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 2 # threads/rank = 18 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/10 job id: 52197197 --- Point --- name: 114 plot: h2o_64_md regex: CP2K label: (4n/2r/18t) --- Point --- name: 115 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/2r/18t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-64 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-64.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 1 # threads/rank = 36 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/11 job id: 52197198 --- Point --- name: 116 plot: h2o_64_md regex: CP2K label: (4n/1r/36t) --- Point --- name: 117 plot: h2o_64_md_mem regex: Estimated peak process memory label: (4n/1r/36t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 36 # threads/rank = 1 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/12 job id: 52197199 --- Point --- name: 200 plot: h2o_128_md regex: CP2K label: (4n/36r/1t) --- Point --- name: 201 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/36r/1t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 18 # threads/rank = 2 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/13 job id: 52197200 --- Point --- name: 202 plot: h2o_128_md regex: CP2K label: (4n/18r/2t) --- Point --- name: 203 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/18r/2t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 12 # threads/rank = 3 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/14 job id: 52197201 --- Point --- name: 204 plot: h2o_128_md regex: CP2K label: (4n/12r/3t) --- Point --- name: 205 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/12r/3t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 9 # threads/rank = 4 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/15 job id: 52197202 --- Point --- name: 206 plot: h2o_128_md regex: CP2K label: (4n/9r/4t) --- Point --- name: 207 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/9r/4t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 6 # threads/rank = 6 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/16 job id: 52197203 --- Point --- name: 208 plot: h2o_128_md regex: CP2K label: (4n/6r/6t) --- Point --- name: 209 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/6r/6t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 4 # threads/rank = 9 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/17 job id: 52197204 --- Point --- name: 210 plot: h2o_128_md regex: CP2K label: (4n/4r/9t) --- Point --- name: 211 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/4r/9t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 3 # threads/rank = 12 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/18 job id: 52197205 --- Point --- name: 212 plot: h2o_128_md regex: CP2K label: (4n/3r/12t) --- Point --- name: 213 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/3r/12t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 2 # threads/rank = 18 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/19 job id: 52197206 --- Point --- name: 214 plot: h2o_128_md regex: CP2K label: (4n/2r/18t) --- Point --- name: 215 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/2r/18t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-128 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-128.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 1 # threads/rank = 36 nrepeat = 1 time[min] = 10 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/20 job id: 52197207 --- Point --- name: 216 plot: h2o_128_md regex: CP2K label: (4n/1r/36t) --- Point --- name: 217 plot: h2o_128_md_mem regex: Estimated peak process memory label: (4n/1r/36t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 36 # threads/rank = 1 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/21 job id: 52197208 --- Point --- name: 400 plot: h2o_256_md regex: CP2K label: (4n/36r/1t) --- Point --- name: 401 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/36r/1t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 18 # threads/rank = 2 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/22 job id: 52197209 --- Point --- name: 402 plot: h2o_256_md regex: CP2K label: (4n/18r/2t) --- Point --- name: 403 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/18r/2t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 12 # threads/rank = 3 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/23 job id: 52197210 --- Point --- name: 404 plot: h2o_256_md regex: CP2K label: (4n/12r/3t) --- Point --- name: 405 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/12r/3t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 9 # threads/rank = 4 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/24 job id: 52197211 --- Point --- name: 406 plot: h2o_256_md regex: CP2K label: (4n/9r/4t) --- Point --- name: 407 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/9r/4t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 6 # threads/rank = 6 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/25 job id: 52197212 --- Point --- name: 408 plot: h2o_256_md regex: CP2K label: (4n/6r/6t) --- Point --- name: 409 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/6r/6t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 4 # threads/rank = 9 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/26 job id: 52197213 --- Point --- name: 410 plot: h2o_256_md regex: CP2K label: (4n/4r/9t) --- Point --- name: 411 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/4r/9t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 3 # threads/rank = 12 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/27 job id: 52197214 --- Point --- name: 412 plot: h2o_256_md regex: CP2K label: (4n/3r/12t) --- Point --- name: 413 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/3r/12t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 2 # threads/rank = 18 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/28 job id: 52197215 --- Point --- name: 414 plot: h2o_256_md regex: CP2K label: (4n/2r/18t) --- Point --- name: 415 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/2r/18t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-256 test - DBCSR dominated (MPI/OMP) input file: benchmarks/QS/H2O-256.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 1 # threads/rank = 36 nrepeat = 1 time[min] = 30 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/29 job id: 52197216 --- Point --- name: 416 plot: h2o_256_md regex: CP2K label: (4n/1r/36t) --- Point --- name: 417 plot: h2o_256_md_mem regex: Estimated peak process memory label: (4n/1r/36t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-32 (NREP 3) linear scaling test (864 H2O) input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 36 # threads/rank = 1 nrepeat = 1 time[min] = 15 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/30 job id: 52197217 --- Point --- name: 500 plot: h2o_32_nrep3_ls regex: CP2K label: (4n/36r/1t) --- Point --- name: 501 plot: h2o_32_nrep3_ls_mem regex: Estimated peak process memory label: (4n/36r/1t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-32 (NREP 3) linear scaling test (864 H2O) input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 18 # threads/rank = 2 nrepeat = 1 time[min] = 15 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/31 job id: 52197218 --- Point --- name: 502 plot: h2o_32_nrep3_ls regex: CP2K label: (4n/18r/2t) --- Point --- name: 503 plot: h2o_32_nrep3_ls_mem regex: Estimated peak process memory label: (4n/18r/2t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-32 (NREP 3) linear scaling test (864 H2O) input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 9 # threads/rank = 4 nrepeat = 1 time[min] = 15 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/32 job id: 52197219 --- Point --- name: 504 plot: h2o_32_nrep3_ls regex: CP2K label: (4n/9r/4t) --- Point --- name: 505 plot: h2o_32_nrep3_ls_mem regex: Estimated peak process memory label: (4n/9r/4t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-32 (NREP 3) linear scaling test (864 H2O) input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 6 # threads/rank = 6 nrepeat = 1 time[min] = 15 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/33 job id: 52197220 --- Point --- name: 506 plot: h2o_32_nrep3_ls regex: CP2K label: (4n/6r/6t) --- Point --- name: 507 plot: h2o_32_nrep3_ls_mem regex: Estimated peak process memory label: (4n/6r/6t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-32 (NREP 3) linear scaling test (864 H2O) input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 4 # threads/rank = 9 nrepeat = 1 time[min] = 15 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/34 job id: 52197221 --- Point --- name: 508 plot: h2o_32_nrep3_ls regex: CP2K label: (4n/4r/9t) --- Point --- name: 509 plot: h2o_32_nrep3_ls_mem regex: Estimated peak process memory label: (4n/4r/9t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: H2O-32 (NREP 3) linear scaling test (864 H2O) input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp required files: [] output file: result.log # nodes = 4 # ranks/node = 3 # threads/rank = 12 nrepeat = 1 time[min] = 15 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/35 job id: 52197222 --- Point --- name: 510 plot: h2o_32_nrep3_ls regex: CP2K label: (4n/3r/12t) --- Point --- name: 511 plot: h2o_32_nrep3_ls_mem regex: Estimated peak process memory label: (4n/3r/12t) ~~~~~~~ END TEST ~~~~~~~ ~~~~~~~~~ TEST ~~~~~~~~~ description: 512 H2O (4 NVE MD steps on 16 nodes) input file: benchmarks/QS/00512_H2O/H2O-512_md.inp required files: [] output file: result.log # nodes = 16 # ranks/node = 36 # threads/rank = 1 nrepeat = 1 time[min] = 20 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/36 job id: 52197224 --- Point --- name: 601 plot: h2o_512_md regex: CP2K label: (16n/36r/1t) --- Point --- name: 602 plot: h2o_512_md_mem regex: Estimated peak process memory label: (16n/36r/1t) ~~~~~~~ END TEST ~~~~~~~ === END TESTS (description) === ===== PLOTS (description) ===== ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_512_md", title="512 H2O (4 NVE MD steps on 16 nodes)", xlabel="Revision", ylabel="Time [s]" ~~~~~~~~~ PLOT ~~~~~~~~~ Plot: name="h2o_512_md_mem", title="512 H2O (4 NVE MD steps on 16 nodes)", xlabel="Revision", ylabel="Est. peak process memory [MiB]" === END PLOTS (description) === ============ RESULTS ============ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/01/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 0.000000E+00 0.0% 0.0% 0.0% flops max/rank 0.000000E+00 0.0% 0.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 0 0.0% 0.0% 0.0% number of processed stacks 0 0.0% 0.0% 0.0% average stack size 0.0 0.0 0.0 marketing flops 0.000000E+00 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 1 12. MP_Allreduce 19 21. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 15 172669. MP_Allreduce 424 8. MP_Sync 3 MP_comm_split 1 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.038 257.089 257.093 farming_run 1 2.0 256.333 256.334 256.976 257.041 ------------------------------------------------------------------------------- @@@@@@@@@@ Run number: 2 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 32 x 32 x 32 4194304 0.0% 100.0% 0.0% flops 29 x 32 x 32 7602176 0.0% 100.0% 0.0% flops 14 x 32 x 32 14221312 0.0% 100.0% 0.0% flops 28 x 32 x 32 27525120 0.0% 100.0% 0.0% flops 43 x 32 x 32 28180480 0.0% 100.0% 0.0% flops 86 x 32 x 32 28180480 0.0% 100.0% 0.0% flops 14 x 32 x 456 78446592 0.0% 100.0% 0.0% flops 57 x 32 x 32 102727680 0.0% 100.0% 0.0% flops 14 x 14 x 32 208732160 0.0% 100.0% 0.0% flops 29 x 14 x 32 212860928 0.0% 100.0% 0.0% flops 14 x 29 x 32 212860928 0.0% 100.0% 0.0% flops 29 x 29 x 32 227352576 0.0% 100.0% 0.0% flops 32 x 32 x 456 298844160 0.0% 100.0% 0.0% flops 28 x 32 x 456 313786368 0.0% 100.0% 0.0% flops 43 x 32 x 456 321257472 0.0% 100.0% 0.0% flops 86 x 32 x 456 321257472 0.0% 100.0% 0.0% flops 57 x 32 x 456 1171095552 0.0% 100.0% 0.0% flops 14 x 32 x 14 895979560448 0.0% 100.0% 0.0% flops 29 x 32 x 14 928073646080 0.0% 100.0% 0.0% flops 14 x 32 x 29 928073646080 0.0% 100.0% 0.0% flops 29 x 32 x 29 961219133440 0.0% 100.0% 0.0% flops 32 x 32 x 14 1693022420992 0.0% 100.0% 0.0% flops 32 x 32 x 29 1753487507456 0.0% 100.0% 0.0% flops inhomo. stacks 1804075008 100.0% 0.0% 0.0% flops total 7.165239E+12 0.0% 100.0% 0.0% flops max/rank 447.990765E+09 0.0% 100.0% 0.0% matmuls inhomo. stacks 1440 100.0% 0.0% 0.0% matmuls total 249334846 0.0% 100.0% 0.0% number of processed stacks 368972 0.4% 99.6% 0.0% average stack size 1.0 678.4 0.0 marketing flops 7.165779E+12 ------------------------------------------------------------------------------- # multiplications 1160 max memory usage/rank 1.395491E+09 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 2592 MPI messages size (bytes): total size 1.140326E+09 min size 0.000000E+00 max size 1.663488E+06 average size 439.940750E+03 MPI breakdown and total messages size (bytes): size <= 128 132 0 128 < size <= 8192 348 2850816 8192 < size <= 32768 0 0 32768 < size <= 131072 1536 179306496 131072 < size <= 4194304 576 958169088 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 24 12. MP_Allreduce 2365 53. MP_Alltoall 4670 822089. MP_ISend 2604 90540. MP_IRecv 2604 90537. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 12 MP_Bcast 230 1134128. MP_Allreduce 571 1938539. MP_Sync 25 MP_Alltoall 38 9316958. MP_SendRecv 120 384007. MP_ISendRecv 45 235435. MP_Wait 191 MP_comm_split 10 MP_ISend 127 3867574. MP_IRecv 127 3866554. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.007 0.027 236.382 236.383 qs_energies 1 2.0 0.001 0.002 236.154 236.207 mp2_main 1 3.0 0.003 0.005 233.705 233.758 mp2_gpw_main 1 4.0 0.007 0.013 232.593 232.647 mp2_ri_gpw_compute_in 1 5.0 0.307 0.357 151.921 154.006 mp2_ri_gpw_compute_in_loop 1 6.0 0.004 0.005 98.417 100.496 mp2_eri_3c_integrate_gpw 272 7.0 0.175 0.185 83.307 85.108 rpa_ri_compute_en 1 5.0 0.067 0.108 80.558 83.892 rpa_num_int 1 6.0 0.006 0.020 69.339 69.353 rpa_num_int_RPA_matrix_operati 8 7.0 0.027 0.040 68.611 69.345 calc_mat_Q 8 8.0 0.000 0.000 67.308 68.179 contract_S_to_Q 8 9.0 0.016 0.031 66.092 66.973 parallel_gemm_fm 14 9.1 0.000 0.000 65.654 66.522 parallel_gemm_fm_cosma 14 10.1 65.654 66.522 65.654 66.522 integrate_v_rspace 273 8.0 0.428 0.444 61.189 62.478 grid_integrate_task_list 273 9.0 55.916 57.301 55.916 57.301 get_2c_integrals 1 6.0 0.023 0.033 48.623 53.199 fft_wrap_pw1pw2 5465 10.4 0.086 0.098 40.580 45.073 fft_wrap_pw1pw2_100 2178 11.4 4.543 5.346 36.667 40.380 compute_2c_integrals 1 7.0 0.037 0.060 32.112 32.127 compute_2c_integrals_loop_lm 1 8.0 0.004 0.005 30.299 31.664 mp2_eri_2c_integrate_gpw 1 9.0 2.101 2.200 30.294 31.659 fft3d_s 5443 12.4 20.543 22.291 20.571 22.320 cp_fm_cholesky_decompose 12 8.2 16.733 21.263 16.733 21.263 cholesky_decomp 1 7.0 0.000 0.000 15.521 20.047 calculate_wavefunction 272 8.0 5.867 6.084 15.775 17.728 mp2_eri_2c_integrate_gpw_pot_l 272 10.0 0.002 0.003 14.744 15.686 calc_potential_gpw 544 9.5 0.006 0.008 14.579 15.253 collocate_single_gaussian 272 10.0 0.056 0.073 12.773 13.877 ao_to_mo_and_store_B_mult_1 272 7.0 12.119 13.749 12.119 13.749 potential_pw2rs 545 10.0 0.149 0.175 11.264 12.301 pw_scatter_s 2720 12.7 9.092 10.003 9.092 10.003 create_integ_mat 1 6.0 0.012 0.016 8.956 8.957 array2fm 1 7.0 0.000 0.000 7.567 8.066 mp_sync 25 8.8 5.272 7.728 5.272 7.728 pw_poisson_solve 545 10.5 0.011 0.015 4.923 5.701 mp_min_d 1 6.0 2.083 5.433 2.083 5.433 pw_gather_s 2722 12.2 4.017 4.732 4.017 4.732 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (4n/4r/9t)", y=232.576150, yerr=0.000000 PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (4n/4r/9t)", y=2747.000000, yerr=0.000000 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/02/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 0.000000E+00 0.0% 0.0% 0.0% flops max/rank 0.000000E+00 0.0% 0.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 0 0.0% 0.0% 0.0% number of processed stacks 0 0.0% 0.0% 0.0% average stack size 0.0 0.0 0.0 marketing flops 0.000000E+00 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 1 12. MP_Allreduce 19 21. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 22 200775. MP_Allreduce 424 9. MP_Sync 4 MP_comm_split 1 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.068 536.399 536.453 farming_run 1 2.0 535.518 535.591 536.268 536.318 ------------------------------------------------------------------------------- @@@@@@@@@@ Run number: 2 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 32 x 32 x 32 16777216 0.0% 100.0% 0.0% flops 100 x 32 x 143 36608000 0.0% 100.0% 0.0% flops 14 x 32 x 32 36700160 0.0% 100.0% 0.0% flops 29 x 32 x 32 38010880 0.0% 100.0% 0.0% flops 128 x 32 x 143 46858240 0.0% 100.0% 0.0% flops 157 x 32 x 143 57474560 0.0% 100.0% 0.0% flops 100 x 32 x 32 58982400 0.0% 100.0% 0.0% flops 171 x 32 x 143 62599680 0.0% 100.0% 0.0% flops 186 x 32 x 143 68090880 0.0% 100.0% 0.0% flops 100 x 32 x 142 72704000 0.0% 100.0% 0.0% flops 200 x 32 x 143 73216000 0.0% 100.0% 0.0% flops 128 x 32 x 32 75497472 0.0% 100.0% 0.0% flops 100 x 32 x 157 80384000 0.0% 100.0% 0.0% flops 157 x 32 x 32 92602368 0.0% 100.0% 0.0% flops 128 x 32 x 142 93061120 0.0% 100.0% 0.0% flops 171 x 32 x 32 100859904 0.0% 100.0% 0.0% flops 128 x 32 x 157 102891520 0.0% 100.0% 0.0% flops 142 x 32 x 143 103966720 0.0% 100.0% 0.0% flops 143 x 32 x 143 104698880 0.0% 100.0% 0.0% flops 186 x 32 x 32 109707264 0.0% 100.0% 0.0% flops 157 x 32 x 142 114145280 0.0% 100.0% 0.0% flops 156 x 32 x 143 114216960 0.0% 100.0% 0.0% flops 200 x 32 x 32 117964800 0.0% 100.0% 0.0% flops 171 x 32 x 142 124323840 0.0% 100.0% 0.0% flops 157 x 32 x 157 126202880 0.0% 100.0% 0.0% flops 186 x 32 x 142 135229440 0.0% 100.0% 0.0% flops 171 x 32 x 157 137456640 0.0% 100.0% 0.0% flops 200 x 32 x 142 145408000 0.0% 100.0% 0.0% flops 32 x 32 x 143 146432000 0.0% 100.0% 0.0% flops 186 x 32 x 157 149514240 0.0% 100.0% 0.0% flops 200 x 32 x 157 160768000 0.0% 100.0% 0.0% flops 142 x 32 x 32 167510016 0.0% 100.0% 0.0% flops 143 x 32 x 32 168689664 0.0% 100.0% 0.0% flops 156 x 32 x 32 184025088 0.0% 100.0% 0.0% flops 142 x 32 x 142 206479360 0.0% 100.0% 0.0% flops 143 x 32 x 142 207933440 0.0% 100.0% 0.0% flops 156 x 32 x 142 226836480 0.0% 100.0% 0.0% flops 142 x 32 x 157 228290560 0.0% 100.0% 0.0% flops 143 x 32 x 157 229898240 0.0% 100.0% 0.0% flops 156 x 32 x 157 250798080 0.0% 100.0% 0.0% flops 32 x 32 x 142 290816000 0.0% 100.0% 0.0% flops 32 x 32 x 157 321536000 0.0% 100.0% 0.0% flops 14 x 14 x 32 626196480 0.0% 100.0% 0.0% flops 29 x 14 x 32 638582784 0.0% 100.0% 0.0% flops 14 x 29 x 32 638582784 0.0% 100.0% 0.0% flops 29 x 29 x 32 682057728 0.0% 100.0% 0.0% flops 14 x 32 x 14 896799524096 0.0% 100.0% 0.0% flops 29 x 32 x 14 928925089792 0.0% 100.0% 0.0% flops 14 x 32 x 29 928925089792 0.0% 100.0% 0.0% flops 29 x 32 x 29 962100985856 0.0% 100.0% 0.0% flops 32 x 32 x 14 1693022420992 0.0% 100.0% 0.0% flops 32 x 32 x 29 1753487507456 0.0% 100.0% 0.0% flops inhomo. stacks 1112785920 100.0% 0.0% 0.0% flops total 7.172345E+12 0.0% 100.0% 0.0% flops max/rank 150.710992E+09 0.1% 99.9% 0.0% matmuls inhomo. stacks 980 100.0% 0.0% 0.0% matmuls total 249562189 0.0% 100.0% 0.0% number of processed stacks 347432 0.3% 99.7% 0.0% average stack size 1.0 720.3 0.0 marketing flops 7.174951E+12 ------------------------------------------------------------------------------- # multiplications 1140 max memory usage/rank 1.205334E+09 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 61440 MPI messages size (bytes): total size 6.073508E+09 min size 0.000000E+00 max size 642.960000E+03 average size 98.852664E+03 MPI breakdown and total messages size (bytes): size <= 128 32004 0 128 < size <= 8192 1820 14909440 8192 < size <= 32768 0 0 32768 < size <= 131072 18640 1081442304 131072 < size <= 4194304 8976 4977156096 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 53 12. MP_Allreduce 1182 39. MP_Alltoall 1797 713945. MP_ISend 3686 54897. MP_IRecv 3622 54246. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 12 MP_Bcast 757 478553. MP_Allreduce 2021 21391. MP_Sync 37 MP_Alltoall 77 MP_SendRecv 4192 1987179. MP_ISendRecv 1034 172713. MP_Wait 1346 MP_comm_split 7 MP_ISend 264 362227. MP_IRecv 264 362718. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.043 343.574 343.610 qs_energies 1 2.0 0.002 0.007 343.357 343.383 mp2_main 1 3.0 0.004 0.015 238.704 238.724 mp2_gpw_main 1 4.0 0.006 0.044 237.695 237.735 mp2_ri_gpw_compute_en 1 5.0 0.054 0.067 125.647 140.043 mp2_ri_gpw_compute_en_RI_loop 1 6.0 2.418 2.530 119.590 119.620 mp2_ri_gpw_compute_in 1 5.0 0.097 0.107 111.907 117.065 scf_env_do_scf 1 3.0 0.000 0.000 104.353 104.364 qs_ks_update_qs_env 5 5.0 0.000 0.000 103.463 103.474 rebuild_ks_matrix 4 6.0 0.000 0.000 103.461 103.473 qs_ks_build_kohn_sham_matrix 4 7.0 0.022 0.025 103.461 103.473 hfx_ks_matrix 4 8.0 0.001 0.001 103.012 103.025 integrate_four_center 4 9.0 0.279 0.654 103.011 103.024 mp2_ri_gpw_compute_en_expansio 172 7.0 0.714 0.810 88.759 93.026 local_gemm 172 8.0 88.044 92.223 88.044 92.223 mp2_ri_gpw_compute_in_loop 1 6.0 0.001 0.002 82.587 87.748 integrate_four_center_main 4 10.0 0.163 0.565 79.723 82.732 integrate_four_center_bin 217 11.0 79.560 82.694 79.560 82.694 init_scf_loop 1 4.0 0.000 0.000 81.484 81.493 mp2_eri_3c_integrate_gpw 91 7.0 0.144 0.180 72.134 76.543 integrate_v_rspace 95 8.0 0.309 0.476 53.849 56.932 grid_integrate_task_list 95 9.0 48.839 50.286 48.839 50.286 fft_wrap_pw1pw2 1868 10.4 0.033 0.042 35.971 42.004 fft_wrap_pw1pw2_100 730 11.4 1.785 2.178 33.282 39.059 mp2_ri_gpw_compute_en_comm 36 7.0 1.098 1.352 24.103 35.500 mp_sendrecv_dm3 3384 8.0 21.187 32.556 21.187 32.556 get_2c_integrals 1 6.0 0.017 0.075 29.101 29.221 compute_2c_integrals 1 7.0 0.016 0.057 28.253 28.266 compute_2c_integrals_loop_lm 1 8.0 0.001 0.002 24.771 28.033 mp2_eri_2c_integrate_gpw 1 9.0 1.576 1.836 24.769 28.032 fft3d_s 1823 12.4 22.186 26.187 22.204 26.205 scf_env_do_scf_inner_loop 4 4.0 0.000 0.000 22.868 22.870 mp_min_d 2 7.0 5.236 19.754 5.236 19.754 mp2_ri_get_integ_group_size 1 6.0 0.001 0.025 5.164 19.564 integrate_four_center_load 4 10.0 0.000 0.000 18.127 18.171 hfx_load_balance 1 11.0 0.001 0.001 18.127 18.171 calc_potential_gpw 182 9.5 0.002 0.003 14.321 17.786 mp2_eri_2c_integrate_gpw_pot_l 91 10.0 0.001 0.001 12.664 15.338 calculate_wavefunction 91 8.0 2.145 2.452 11.560 14.679 potential_pw2rs 186 10.0 0.043 0.046 9.952 13.893 mp_comm_split_direct 6 7.2 3.301 12.138 3.301 12.138 collocate_single_gaussian 91 10.0 0.019 0.024 9.738 11.894 mp_sum_l 425 2.2 5.543 10.981 5.543 10.981 hfx_load_balance_dist 1 12.0 0.000 0.000 5.503 10.905 ao_to_mo_and_store_B_mult_1 91 7.0 8.857 10.372 8.857 10.372 mp_sync 37 10.5 3.811 9.307 3.811 9.307 hfx_load_balance_count 1 12.0 6.300 9.059 6.300 9.059 hfx_load_balance_bin 1 12.0 6.282 9.024 6.282 9.024 pw_poisson_solve 186 10.4 0.006 0.008 5.800 7.746 pw_scatter_s 910 12.7 5.709 7.284 5.709 7.284 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (4n/12r/3t)", y=237.693830, yerr=0.000000 PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (4n/12r/3t)", y=1303.000000, yerr=0.000000 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/03/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 32 x 32 x 32 26877100032 0.0% 100.0% 0.0% flops 209 x 32 x 209 42582335488 0.0% 100.0% 0.0% flops 209 x 32 x 213 43397308416 0.0% 100.0% 0.0% flops 213 x 32 x 209 43397308416 0.0% 100.0% 0.0% flops 9 x 9 x 32 44168260608 0.0% 100.0% 0.0% flops 213 x 32 x 213 44227878912 0.0% 100.0% 0.0% flops 209 x 32 x 218 44416024576 0.0% 100.0% 0.0% flops 218 x 32 x 209 44416024576 0.0% 100.0% 0.0% flops 213 x 32 x 218 45266092032 0.0% 100.0% 0.0% flops 218 x 32 x 213 45266092032 0.0% 100.0% 0.0% flops 32 x 32 x 209 46131576832 0.0% 100.0% 0.0% flops 218 x 32 x 218 46328676352 0.0% 100.0% 0.0% flops 32 x 32 x 213 47014477824 0.0% 100.0% 0.0% flops 32 x 32 x 218 48118104064 0.0% 100.0% 0.0% flops 22 x 9 x 32 53835724800 0.0% 100.0% 0.0% flops 9 x 22 x 32 53885500416 0.0% 100.0% 0.0% flops 209 x 32 x 32 56760467456 0.0% 100.0% 0.0% flops 213 x 32 x 32 57846792192 0.0% 100.0% 0.0% flops 218 x 32 x 32 59204698112 0.0% 100.0% 0.0% flops 22 x 22 x 32 67007283200 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 1.880888E+12 0.0% 100.0% 0.0% flops max/rank 20.325101E+09 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 101210040 0.0% 100.0% 0.0% number of processed stacks 3134624 0.0% 100.0% 0.0% average stack size 0.0 32.3 0.0 marketing flops 2.107629E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 179.122176E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 7242048 MPI messages size (bytes): total size 355.819487E+09 min size 0.000000E+00 max size 380.192000E+03 average size 49.132441E+03 MPI breakdown and total messages size (bytes): size <= 128 2986104 0 128 < size <= 8192 1493448 12234326016 8192 < size <= 32768 0 0 32768 < size <= 131072 2138400 116785152000 131072 < size <= 4194304 624096 226802306368 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 27 12. MP_Allreduce 12193 16. MP_Alltoall 8655 34121. MP_ISend 109684 25393. MP_IRecv 109684 24883. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3683 62395. MP_Allreduce 10330 309. MP_Sync 1482 MP_Alltoall 2094 25181027. MP_SendRecv 34034 3780. MP_ISendRecv 34034 3780. MP_Wait 45572 MP_comm_split 50 MP_ISend 23112 34348. MP_IRecv 23112 34348. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.015 0.042 24.229 24.237 qs_mol_dyn_low 1 2.0 0.010 0.019 23.922 23.932 qs_forces 11 3.9 0.002 0.003 23.194 23.206 qs_energies 11 4.9 0.001 0.002 21.838 21.850 scf_env_do_scf 11 5.9 0.000 0.001 19.759 19.764 scf_env_do_scf_inner_loop 108 6.5 0.002 0.007 17.875 17.880 velocity_verlet 10 3.0 0.001 0.002 13.748 13.756 rebuild_ks_matrix 119 8.3 0.000 0.000 8.016 8.109 qs_ks_build_kohn_sham_matrix 119 9.3 0.011 0.013 8.016 8.109 qs_ks_update_qs_env 119 7.6 0.001 0.001 7.107 7.193 sum_up_and_integrate 119 10.3 0.001 0.002 6.338 6.356 integrate_v_rspace 119 11.3 0.002 0.003 6.332 6.351 qs_scf_new_mos 108 7.5 0.000 0.001 6.225 6.295 qs_scf_loop_do_ot 108 8.5 0.001 0.001 6.225 6.294 dbcsr_multiply_generic 2286 12.5 0.091 0.097 6.013 6.139 ot_scf_mini 108 9.5 0.002 0.002 5.896 5.955 qs_rho_update_rho_low 119 7.7 0.001 0.001 5.861 5.867 calculate_rho_elec 119 8.7 0.010 0.011 5.860 5.867 mp_waitall_1 294200 16.4 2.496 3.942 2.496 3.942 multiply_cannon 2286 13.5 0.172 0.180 3.318 3.910 ot_mini 108 10.5 0.001 0.001 3.209 3.275 multiply_cannon_loop 2286 14.5 0.149 0.163 2.681 3.083 density_rs2pw 119 9.7 0.004 0.005 2.888 3.021 grid_integrate_task_list 119 12.3 2.805 2.980 2.805 2.980 potential_pw2rs 119 12.3 0.004 0.004 2.762 2.778 mp_waitany 7404 13.9 2.195 2.748 2.195 2.748 grid_collocate_task_list 119 9.7 2.397 2.576 2.397 2.576 qs_ot_get_derivative 108 11.5 0.001 0.001 2.491 2.551 transfer_pw2rs 487 13.2 0.005 0.005 2.214 2.229 transfer_rs2pw 487 10.6 0.005 0.006 1.984 2.131 multiply_cannon_metrocomm3 27432 15.5 0.073 0.077 0.717 2.051 fft_wrap_pw1pw2 1201 11.6 0.008 0.010 1.979 2.046 make_m2s 4572 13.5 0.065 0.070 1.787 1.962 init_scf_loop 11 6.9 0.000 0.000 1.861 1.863 qs_ot_get_p 119 10.4 0.001 0.001 1.646 1.725 make_images 4572 14.5 0.134 0.139 1.506 1.679 mp_alltoall_d11v 2130 13.8 1.520 1.641 1.520 1.641 multiply_cannon_multrec 27432 15.5 0.900 1.591 0.907 1.599 fft_wrap_pw1pw2_140 487 12.2 0.037 0.045 1.445 1.583 init_scf_run 11 5.9 0.000 0.001 1.469 1.469 scf_env_initial_rho_setup 11 6.9 0.000 0.001 1.469 1.469 fft3d_pb 487 13.2 0.362 0.503 1.317 1.441 transfer_rs2pw_50 119 11.7 0.103 0.117 1.215 1.252 transfer_pw2rs_50 119 14.3 0.071 0.083 1.162 1.245 wfi_extrapolate 11 7.9 0.001 0.001 1.104 1.104 mp_alltoall_z22v 1688 15.5 0.922 1.029 0.922 1.029 multiply_cannon_metrocomm1 27432 15.5 0.077 0.082 0.567 1.023 prepare_preconditioner 11 7.9 0.000 0.000 0.953 0.961 make_preconditioner 11 8.9 0.000 0.000 0.953 0.961 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 0.953 0.961 qs_ot_p2m_diag 50 11.0 0.003 0.006 0.922 0.929 make_images_data 4572 15.5 0.051 0.059 0.681 0.928 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 0.872 0.904 make_full_inverse_cholesky 11 9.9 0.000 0.000 0.876 0.896 qs_ot_get_derivative_taylor 59 13.0 0.001 0.001 0.840 0.870 hybrid_alltoall_any 4725 16.4 0.049 0.109 0.555 0.852 mp_sum_l 11298 13.2 0.495 0.850 0.495 0.850 rs_gather_matrices 119 12.3 0.041 0.055 0.712 0.831 cp_dbcsr_syevd 50 12.0 0.005 0.006 0.788 0.790 mp_allgather_i34 2286 14.5 0.325 0.750 0.325 0.750 make_images_sizes 4572 15.5 0.005 0.005 0.489 0.721 transfer_pw2rs_140 130 13.9 0.230 0.264 0.660 0.719 mp_alltoall_i44 4572 16.5 0.484 0.716 0.484 0.716 ot_diis_step 108 11.5 0.006 0.006 0.702 0.702 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 0.590 0.671 apply_single 119 13.6 0.000 0.000 0.589 0.670 transfer_rs2pw_140 130 11.5 0.158 0.197 0.513 0.639 fft3d_ps 714 14.0 0.045 0.085 0.506 0.593 rs_scatter_matrices 130 9.7 0.028 0.036 0.583 0.590 mp_sum_d 4139 12.0 0.355 0.563 0.355 0.563 cp_fm_diag_elpa 50 13.0 0.000 0.000 0.541 0.541 cp_fm_redistribute_end 50 14.0 0.261 0.507 0.266 0.511 dbcsr_complete_redistribute 329 12.2 0.043 0.061 0.465 0.496 cp_fm_diag_elpa_base 50 14.0 0.240 0.482 0.243 0.488 mp_sum_dm 438 4.9 0.476 0.487 0.476 0.487 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="100", plot="h2o_64_md", label="(4n/36r/1t)", y=24.237000, yerr=0.000000 PlotPoint: name="101", plot="h2o_64_md_mem", label="(4n/36r/1t)", y=170.454545, yerr=0.782030 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/04/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 142 x 32 x 213 3685656576 0.0% 100.0% 0.0% flops 142 x 32 x 218 3772174336 0.0% 100.0% 0.0% flops 182 x 32 x 213 4723869696 0.0% 100.0% 0.0% flops 182 x 32 x 218 4834758656 0.0% 100.0% 0.0% flops 187 x 32 x 213 4853646336 0.0% 100.0% 0.0% flops 191 x 32 x 213 4957467648 0.0% 100.0% 0.0% flops 187 x 32 x 218 4967581696 0.0% 100.0% 0.0% flops 191 x 32 x 218 5073840128 0.0% 100.0% 0.0% flops 196 x 32 x 213 5087244288 0.0% 100.0% 0.0% flops 196 x 32 x 218 5206663168 0.0% 100.0% 0.0% flops 209 x 32 x 213 5424663552 0.0% 100.0% 0.0% flops 209 x 32 x 218 5552003072 0.0% 100.0% 0.0% flops 218 x 32 x 213 5658261504 0.0% 100.0% 0.0% flops 218 x 32 x 218 5791084544 0.0% 100.0% 0.0% flops 240 x 32 x 213 6229278720 0.0% 100.0% 0.0% flops 240 x 32 x 218 6375505920 0.0% 100.0% 0.0% flops 249 x 32 x 213 6462876672 0.0% 100.0% 0.0% flops 249 x 32 x 218 6614587392 0.0% 100.0% 0.0% flops 284 x 32 x 213 7371313152 0.0% 100.0% 0.0% flops 284 x 32 x 218 7544348672 0.0% 100.0% 0.0% flops 142 x 32 x 32 9641132032 0.0% 100.0% 0.0% flops 142 x 32 x 209 10849327104 0.0% 100.0% 0.0% flops 231 x 32 x 213 11991361536 0.0% 100.0% 0.0% flops 231 x 32 x 218 12272848896 0.0% 100.0% 0.0% flops 182 x 32 x 32 12356943872 0.0% 100.0% 0.0% flops 187 x 32 x 32 12696420352 0.0% 100.0% 0.0% flops 191 x 32 x 32 12968001536 0.0% 100.0% 0.0% flops 196 x 32 x 32 13307478016 0.0% 100.0% 0.0% flops 182 x 32 x 209 13905475584 0.0% 100.0% 0.0% flops 209 x 32 x 32 14190116864 0.0% 100.0% 0.0% flops 187 x 32 x 209 14287494144 0.0% 100.0% 0.0% flops 191 x 32 x 209 14593108992 0.0% 100.0% 0.0% flops 218 x 32 x 32 14801174528 0.0% 100.0% 0.0% flops 196 x 32 x 209 14975127552 0.0% 100.0% 0.0% flops 209 x 32 x 209 15968375808 0.0% 100.0% 0.0% flops 240 x 32 x 32 16294871040 0.0% 100.0% 0.0% flops 218 x 32 x 209 16656009216 0.0% 100.0% 0.0% flops 249 x 32 x 32 16905928704 0.0% 100.0% 0.0% flops 240 x 32 x 209 18336890880 0.0% 100.0% 0.0% flops 249 x 32 x 209 19024524288 0.0% 100.0% 0.0% flops 284 x 32 x 32 19282264064 0.0% 100.0% 0.0% flops 284 x 32 x 209 21698654208 0.0% 100.0% 0.0% flops 32 x 32 x 213 23507238912 0.0% 100.0% 0.0% flops 32 x 32 x 218 24059052032 0.0% 100.0% 0.0% flops 32 x 32 x 32 26877100032 0.0% 100.0% 0.0% flops 231 x 32 x 32 31367626752 0.0% 100.0% 0.0% flops 231 x 32 x 209 35298514944 0.0% 100.0% 0.0% flops 9 x 9 x 32 44168260608 0.0% 100.0% 0.0% flops 22 x 9 x 32 53835724800 0.0% 100.0% 0.0% flops 9 x 22 x 32 53885500416 0.0% 100.0% 0.0% flops 22 x 22 x 32 67007283200 0.0% 100.0% 0.0% flops 32 x 32 x 209 69197365248 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 103113707520 100.0% 0.0% 0.0% flops total 1.890248E+12 5.5% 94.5% 0.0% flops max/rank 40.294274E+09 6.1% 93.9% 0.0% matmuls inhomo. stacks 76736 100.0% 0.0% 0.0% matmuls total 101210040 0.1% 99.9% 0.0% number of processed stacks 3136704 2.4% 97.6% 0.0% average stack size 1.0 33.1 0.0 marketing flops 2.107629E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 203.223040E+06 # max total images/rank 2 # max 3D layers 1 # MPI messages exchanged 3456432 MPI messages size (bytes): total size 321.940816E+09 min size 0.000000E+00 max size 765.456000E+03 average size 93.142531E+03 MPI breakdown and total messages size (bytes): size <= 128 1163952 0 128 < size <= 8192 704472 5771034624 8192 < size <= 32768 140976 2309750784 32768 < size <= 131072 1134984 87058022400 131072 < size <= 4194304 312048 226802306368 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3683 62487. MP_Allreduce 10328 308. MP_Sync 54 MP_Alltoall 2082 803417. MP_SendRecv 16898 6600. MP_ISendRecv 16898 6600. MP_Wait 35258 MP_comm_split 50 MP_ISend 15892 63460. MP_IRecv 15892 63460. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.041 32.858 32.859 qs_mol_dyn_low 1 2.0 0.007 0.016 32.587 32.597 qs_forces 11 3.9 0.003 0.003 32.489 32.508 qs_energies 11 4.9 0.001 0.001 30.492 30.513 scf_env_do_scf 11 5.9 0.001 0.004 27.571 27.572 scf_env_do_scf_inner_loop 108 6.5 0.003 0.035 24.734 24.742 velocity_verlet 10 3.0 0.002 0.005 19.036 19.038 rebuild_ks_matrix 119 8.3 0.001 0.001 10.406 10.493 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.017 10.406 10.493 qs_ks_update_qs_env 119 7.6 0.001 0.001 9.225 9.305 qs_scf_new_mos 108 7.5 0.001 0.001 9.166 9.279 qs_scf_loop_do_ot 108 8.5 0.001 0.001 9.165 9.278 dbcsr_multiply_generic 2286 12.5 0.129 0.192 8.775 8.862 ot_scf_mini 108 9.5 0.003 0.003 8.668 8.749 sum_up_and_integrate 119 10.3 0.001 0.002 8.102 8.111 qs_rho_update_rho_low 119 7.7 0.001 0.001 8.098 8.107 calculate_rho_elec 119 8.7 0.018 0.022 8.098 8.107 integrate_v_rspace 119 11.3 0.003 0.004 8.088 8.097 multiply_cannon 2286 13.5 0.229 0.266 4.807 5.269 grid_collocate_task_list 119 9.7 4.656 4.870 4.656 4.870 ot_mini 108 10.5 0.001 0.001 4.767 4.862 grid_integrate_task_list 119 12.3 4.603 4.785 4.603 4.785 mp_waitall_1 220534 16.5 2.534 4.391 2.534 4.391 multiply_cannon_loop 2286 14.5 0.215 0.309 3.908 4.226 qs_ot_get_derivative 108 11.5 0.001 0.001 3.644 3.729 density_rs2pw 119 9.7 0.006 0.009 2.851 3.050 multiply_cannon_metrocomm3 27432 15.5 0.091 0.191 1.246 2.901 init_scf_loop 11 6.9 0.000 0.000 2.810 2.818 make_m2s 4572 13.5 0.088 0.115 2.629 2.784 multiply_cannon_multrec 27432 15.5 1.728 2.754 1.741 2.766 potential_pw2rs 119 12.3 0.008 0.011 2.679 2.693 qs_ot_get_p 119 10.4 0.001 0.001 2.363 2.511 fft_wrap_pw1pw2 1201 11.6 0.016 0.018 2.316 2.354 make_images 4572 14.5 0.221 0.286 2.154 2.295 mp_waitany 15892 13.8 1.840 2.268 1.840 2.268 transfer_pw2rs 487 13.2 0.006 0.007 2.037 2.054 init_scf_run 11 5.9 0.000 0.008 1.991 1.992 scf_env_initial_rho_setup 11 6.9 0.000 0.005 1.991 1.991 transfer_rs2pw 487 10.6 0.007 0.009 1.743 1.966 fft3d_ps 1201 13.6 0.624 0.709 1.912 1.949 mp_alltoall_d11v 2130 13.8 1.727 1.924 1.727 1.924 fft_wrap_pw1pw2_140 487 12.2 0.093 0.106 1.873 1.918 prepare_preconditioner 11 7.9 0.000 0.000 1.558 1.574 make_preconditioner 11 8.9 0.000 0.000 1.558 1.574 wfi_extrapolate 11 7.9 0.001 0.001 1.544 1.544 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.395 1.435 qs_ot_get_derivative_taylor 59 13.0 0.002 0.003 1.255 1.296 qs_ot_get_derivative_diag 49 12.0 0.001 0.002 1.248 1.294 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.270 1.278 qs_ot_p2m_diag 50 11.0 0.006 0.014 1.208 1.223 mp_alltoall_z22v 1201 15.6 1.040 1.184 1.040 1.184 mp_sum_l 11298 13.2 0.710 1.128 0.710 1.128 ot_diis_step 108 11.5 0.012 0.014 1.107 1.108 make_images_data 4572 15.5 0.064 0.092 0.874 1.080 transfer_pw2rs_50 119 14.3 0.097 0.111 0.869 1.042 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 0.986 1.039 apply_single 119 13.6 0.000 0.001 0.986 1.038 hybrid_alltoall_any 4725 16.4 0.064 0.124 0.769 1.032 transfer_rs2pw_140 130 11.5 0.207 0.239 0.794 1.017 cp_dbcsr_syevd 50 12.0 0.004 0.004 0.995 0.996 transfer_pw2rs_140 130 13.9 0.293 0.362 0.896 0.975 make_images_sizes 4572 15.5 0.006 0.014 0.672 0.965 rs_gather_matrices 119 12.3 0.063 0.071 0.751 0.961 mp_alltoall_i44 4572 16.5 0.666 0.960 0.666 0.960 mp_sum_d 4139 12.0 0.434 0.808 0.434 0.808 dbcsr_complete_redistribute 329 12.2 0.075 0.101 0.731 0.804 mp_allgather_i34 2286 14.5 0.417 0.770 0.417 0.770 yz_to_x 368 14.5 0.054 0.076 0.663 0.759 transfer_rs2pw_50 119 11.7 0.128 0.139 0.724 0.750 multiply_cannon_metrocomm4 25146 15.5 0.088 0.185 0.321 0.739 qs_energies_init_hamiltonians 11 5.9 0.000 0.001 0.706 0.725 build_core_hamiltonian_matrix_ 11 4.9 0.000 0.001 0.603 0.698 cp_fm_diag_elpa 50 13.0 0.000 0.000 0.692 0.693 cp_fm_redistribute_end 50 14.0 0.339 0.662 0.344 0.666 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="102", plot="h2o_64_md", label="(4n/18r/2t)", y=32.859000, yerr=0.000000 PlotPoint: name="103", plot="h2o_64_md_mem", label="(4n/18r/2t)", y=193.727273, yerr=0.616575 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/05/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 142 x 32 x 200 3460710400 0.0% 100.0% 0.0% flops 164 x 32 x 200 3996876800 0.0% 100.0% 0.0% flops 209 x 32 x 200 5093580800 0.0% 100.0% 0.0% flops 213 x 32 x 200 5191065600 0.0% 100.0% 0.0% flops 64 x 32 x 200 5518131200 0.0% 100.0% 0.0% flops 231 x 32 x 200 5629747200 0.0% 100.0% 0.0% flops 262 x 32 x 200 6385254400 0.0% 100.0% 0.0% flops 64 x 32 x 32 6719275008 0.0% 100.0% 0.0% flops 293 x 32 x 200 7140761600 0.0% 100.0% 0.0% flops 142 x 32 x 209 7232884736 0.0% 100.0% 0.0% flops 142 x 32 x 222 7682777088 0.0% 100.0% 0.0% flops 164 x 32 x 209 8353472512 0.0% 100.0% 0.0% flops 164 x 32 x 222 8873066496 0.0% 100.0% 0.0% flops 196 x 32 x 200 9553510400 0.0% 100.0% 0.0% flops 142 x 32 x 32 9641132032 0.0% 100.0% 0.0% flops 209 x 32 x 209 10645583872 0.0% 100.0% 0.0% flops 213 x 32 x 209 10849327104 0.0% 100.0% 0.0% flops 164 x 32 x 32 11134828544 0.0% 100.0% 0.0% flops 209 x 32 x 222 11307749376 0.0% 100.0% 0.0% flops 213 x 32 x 222 11524165632 0.0% 100.0% 0.0% flops 64 x 32 x 209 11532894208 0.0% 100.0% 0.0% flops 231 x 32 x 209 11766171648 0.0% 100.0% 0.0% flops 64 x 32 x 222 12250251264 0.0% 100.0% 0.0% flops 231 x 32 x 222 12498038784 0.0% 100.0% 0.0% flops 262 x 32 x 209 13345181696 0.0% 100.0% 0.0% flops 262 x 32 x 222 14175264768 0.0% 100.0% 0.0% flops 209 x 32 x 32 14190116864 0.0% 100.0% 0.0% flops 213 x 32 x 32 14461698048 0.0% 100.0% 0.0% flops 293 x 32 x 209 14924191744 0.0% 100.0% 0.0% flops 231 x 32 x 32 15683813376 0.0% 100.0% 0.0% flops 293 x 32 x 222 15852490752 0.0% 100.0% 0.0% flops 218 x 32 x 200 15938764800 0.0% 100.0% 0.0% flops 32 x 32 x 200 16554393600 0.0% 100.0% 0.0% flops 262 x 32 x 32 17788567552 0.0% 100.0% 0.0% flops 293 x 32 x 32 19893321728 0.0% 100.0% 0.0% flops 196 x 32 x 209 19966836736 0.0% 100.0% 0.0% flops 32 x 32 x 32 20157825024 0.0% 100.0% 0.0% flops 196 x 32 x 222 21208793088 0.0% 100.0% 0.0% flops 196 x 32 x 32 26614956032 0.0% 100.0% 0.0% flops 218 x 32 x 209 33312018432 0.0% 100.0% 0.0% flops 32 x 32 x 209 34598682624 0.0% 100.0% 0.0% flops 218 x 32 x 222 35384057856 0.0% 100.0% 0.0% flops 32 x 32 x 222 36750753792 0.0% 100.0% 0.0% flops 9 x 9 x 32 44168260608 0.0% 100.0% 0.0% flops 218 x 32 x 32 44403523584 0.0% 100.0% 0.0% flops 22 x 9 x 32 53835724800 0.0% 100.0% 0.0% flops 9 x 22 x 32 53885500416 0.0% 100.0% 0.0% flops 22 x 22 x 32 67007283200 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 105981222912 100.0% 0.0% 0.0% flops total 1.894805E+12 5.6% 94.4% 0.0% flops max/rank 58.021006E+09 6.4% 93.6% 0.0% matmuls inhomo. stacks 70000 100.0% 0.0% 0.0% matmuls total 101118360 0.1% 99.9% 0.0% number of processed stacks 3045024 2.3% 97.7% 0.0% average stack size 1.0 34.0 0.0 marketing flops 2.107592E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 220.721152E+06 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 2194560 MPI messages size (bytes): total size 310.646604E+09 min size 0.000000E+00 max size 1.145520E+06 average size 141.553031E+03 MPI breakdown and total messages size (bytes): size <= 128 724648 0 128 < size <= 8192 253512 2076770304 8192 < size <= 32768 281952 4619501568 32768 < size <= 131072 494448 39143342080 131072 < size <= 4194304 440000 264807943488 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3672 62658. MP_Allreduce 10306 303. MP_Sync 54 MP_Alltoall 2060 1624918. MP_SendRecv 16779 37093. MP_ISendRecv 16779 37093. MP_Wait 23539 MP_comm_split 50 MP_ISend 5720 128509. MP_IRecv 5720 128509. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.037 31.845 31.847 qs_mol_dyn_low 1 2.0 0.012 0.026 31.549 31.560 qs_forces 11 3.9 0.004 0.008 31.414 31.442 qs_energies 11 4.9 0.001 0.001 29.736 29.767 scf_env_do_scf 11 5.9 0.001 0.001 26.797 26.799 scf_env_do_scf_inner_loop 108 6.5 0.003 0.008 23.858 23.860 velocity_verlet 10 3.0 0.001 0.002 18.083 18.086 qs_scf_new_mos 108 7.5 0.001 0.001 10.662 10.742 qs_scf_loop_do_ot 108 8.5 0.001 0.001 10.661 10.741 dbcsr_multiply_generic 2286 12.5 0.132 0.190 10.155 10.428 ot_scf_mini 108 9.5 0.003 0.003 10.135 10.219 rebuild_ks_matrix 119 8.3 0.001 0.001 8.783 8.922 qs_ks_build_kohn_sham_matrix 119 9.3 0.013 0.016 8.783 8.921 qs_ks_update_qs_env 119 7.6 0.001 0.002 7.802 7.927 qs_rho_update_rho_low 119 7.7 0.001 0.001 6.872 6.881 calculate_rho_elec 119 8.7 0.026 0.031 6.871 6.880 sum_up_and_integrate 119 10.3 0.001 0.002 6.389 6.414 integrate_v_rspace 119 11.3 0.003 0.003 6.378 6.403 multiply_cannon 2286 13.5 0.225 0.265 5.387 6.130 ot_mini 108 10.5 0.001 0.001 5.658 5.765 mp_waitall_1 200699 16.5 2.706 5.131 2.706 5.131 multiply_cannon_loop 2286 14.5 0.227 0.344 4.467 5.100 grid_collocate_task_list 119 9.7 4.201 4.663 4.201 4.663 qs_ot_get_derivative 108 11.5 0.001 0.002 4.224 4.313 grid_integrate_task_list 119 12.3 3.907 4.120 3.907 4.120 multiply_cannon_metrocomm3 27432 15.5 0.093 0.201 1.634 3.984 multiply_cannon_multrec 27432 15.5 1.846 3.611 1.859 3.624 make_m2s 4572 13.5 0.089 0.124 3.024 3.178 density_rs2pw 119 9.7 0.005 0.008 2.375 3.160 init_scf_loop 11 6.9 0.000 0.000 2.915 2.917 qs_ot_get_p 119 10.4 0.001 0.001 2.716 2.828 make_images 4572 14.5 0.232 0.290 2.444 2.672 transfer_rs2pw 487 10.6 0.005 0.007 1.480 2.360 fft_wrap_pw1pw2 1201 11.6 0.012 0.014 2.142 2.199 potential_pw2rs 119 12.3 0.007 0.011 1.969 1.994 fft3d_ps 1201 13.6 0.557 0.660 1.849 1.983 init_scf_run 11 5.9 0.000 0.001 1.917 1.918 scf_env_initial_rho_setup 11 6.9 0.000 0.001 1.917 1.918 mp_waitany 5720 13.7 0.977 1.859 0.977 1.859 prepare_preconditioner 11 7.9 0.000 0.000 1.829 1.838 make_preconditioner 11 8.9 0.000 0.000 1.829 1.838 fft_wrap_pw1pw2_140 487 12.2 0.061 0.075 1.759 1.818 transfer_rs2pw_140 130 11.5 0.151 0.176 0.919 1.796 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.684 1.724 mp_sum_l 11298 13.2 1.089 1.647 1.089 1.647 qs_ot_get_derivative_diag 49 12.0 0.001 0.002 1.496 1.541 qs_ot_get_derivative_taylor 59 13.0 0.002 0.003 1.433 1.476 wfi_extrapolate 11 7.9 0.001 0.001 1.438 1.438 ot_diis_step 108 11.5 0.014 0.016 1.391 1.393 transfer_pw2rs 487 13.2 0.004 0.005 1.370 1.381 qs_ot_p2m_diag 50 11.0 0.008 0.014 1.354 1.375 make_images_sizes 4572 15.5 0.006 0.014 0.938 1.345 mp_alltoall_z22v 1201 15.6 1.073 1.342 1.073 1.342 mp_alltoall_d11v 2130 13.8 1.047 1.340 1.047 1.340 mp_alltoall_i44 4572 16.5 0.932 1.339 0.932 1.339 apply_preconditioner_dbcsr 119 12.6 0.000 0.001 1.149 1.294 apply_single 119 13.6 0.000 0.001 1.149 1.293 cp_dbcsr_syevd 50 12.0 0.004 0.004 1.117 1.119 make_images_data 4572 15.5 0.063 0.109 0.859 1.069 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.052 1.066 mp_sum_d 4139 12.0 0.634 1.037 0.634 1.037 multiply_cannon_metrocomm4 25146 15.5 0.088 0.195 0.466 0.960 hybrid_alltoall_any 4725 16.4 0.065 0.124 0.730 0.950 cp_fm_diag_elpa 50 13.0 0.000 0.000 0.842 0.842 qs_energies_init_hamiltonians 11 5.9 0.021 0.043 0.809 0.837 mp_irecv_dv 59094 16.3 0.341 0.831 0.341 0.831 cp_fm_redistribute_end 50 14.0 0.413 0.813 0.417 0.816 cp_fm_diag_elpa_base 50 14.0 0.353 0.742 0.395 0.795 dbcsr_complete_redistribute 329 12.2 0.115 0.165 0.712 0.780 cp_fm_cholesky_invert 11 10.9 0.760 0.771 0.760 0.771 rs_gather_matrices 119 12.3 0.037 0.044 0.476 0.771 yz_to_x 368 14.5 0.039 0.048 0.611 0.754 qs_ot_get_orbitals 108 10.5 0.001 0.001 0.715 0.736 mp_allgather_i34 2286 14.5 0.410 0.709 0.410 0.709 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.675 0.681 transfer_pw2rs_140 130 13.9 0.147 0.169 0.609 0.674 transfer_pw2rs_50 119 14.3 0.352 0.396 0.588 0.666 dbcsr_make_images_dense 3978 14.8 0.057 0.088 0.392 0.648 x_to_yz 357 14.4 0.048 0.061 0.530 0.640 calculate_dm_sparse 119 9.5 0.000 0.001 0.603 0.639 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="104", plot="h2o_64_md", label="(4n/12r/3t)", y=31.847000, yerr=0.000000 PlotPoint: name="105", plot="h2o_64_md_mem", label="(4n/12r/3t)", y=210.636364, yerr=0.481046 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/06/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 80 x 32 x 32 1357905920 0.0% 100.0% 0.0% flops 80 x 32 x 64 1357905920 0.0% 100.0% 0.0% flops 80 x 64 x 32 1357905920 0.0% 100.0% 0.0% flops 80 x 64 x 64 1357905920 0.0% 100.0% 0.0% flops 89 x 32 x 32 1510670336 0.0% 100.0% 0.0% flops 89 x 32 x 64 1510670336 0.0% 100.0% 0.0% flops 89 x 64 x 32 1510670336 0.0% 100.0% 0.0% flops 89 x 64 x 64 1510670336 0.0% 100.0% 0.0% flops 64 x 64 x 64 1679818752 0.0% 100.0% 0.0% flops 64 x 64 x 32 1679818752 0.0% 100.0% 0.0% flops 64 x 32 x 32 1679818752 0.0% 100.0% 0.0% flops 64 x 32 x 64 1679818752 0.0% 100.0% 0.0% flops 80 x 32 x 422 2056929280 0.0% 100.0% 0.0% flops 80 x 64 x 422 2056929280 0.0% 100.0% 0.0% flops 80 x 32 x 427 2081300480 0.0% 100.0% 0.0% flops 80 x 64 x 427 2081300480 0.0% 100.0% 0.0% flops 80 x 32 x 431 2100797440 0.0% 100.0% 0.0% flops 80 x 64 x 431 2100797440 0.0% 100.0% 0.0% flops 89 x 32 x 422 2288333824 0.0% 100.0% 0.0% flops 89 x 64 x 422 2288333824 0.0% 100.0% 0.0% flops 89 x 32 x 427 2315446784 0.0% 100.0% 0.0% flops 89 x 64 x 427 2315446784 0.0% 100.0% 0.0% flops 89 x 32 x 431 2337137152 0.0% 100.0% 0.0% flops 89 x 64 x 431 2337137152 0.0% 100.0% 0.0% flops 71 x 64 x 64 3615424512 0.0% 100.0% 0.0% flops 71 x 64 x 32 3615424512 0.0% 100.0% 0.0% flops 71 x 32 x 32 3615424512 0.0% 100.0% 0.0% flops 71 x 32 x 64 3615424512 0.0% 100.0% 0.0% flops 32 x 32 x 32 5039456256 0.0% 100.0% 0.0% flops 32 x 32 x 64 5039456256 0.0% 100.0% 0.0% flops 32 x 64 x 64 5039456256 0.0% 100.0% 0.0% flops 32 x 64 x 32 5039456256 0.0% 100.0% 0.0% flops 71 x 64 x 422 5476574208 0.0% 100.0% 0.0% flops 71 x 32 x 422 5476574208 0.0% 100.0% 0.0% flops 71 x 64 x 427 5541462528 0.0% 100.0% 0.0% flops 71 x 32 x 427 5541462528 0.0% 100.0% 0.0% flops 71 x 64 x 431 5593373184 0.0% 100.0% 0.0% flops 71 x 32 x 431 5593373184 0.0% 100.0% 0.0% flops 64 x 64 x 422 5821628416 0.0% 100.0% 0.0% flops 64 x 32 x 422 5821628416 0.0% 100.0% 0.0% flops 64 x 64 x 427 5890605056 0.0% 100.0% 0.0% flops 64 x 32 x 427 5890605056 0.0% 100.0% 0.0% flops 64 x 64 x 431 5945786368 0.0% 100.0% 0.0% flops 64 x 32 x 431 5945786368 0.0% 100.0% 0.0% flops 111 x 64 x 64 9420472320 0.0% 100.0% 0.0% flops 111 x 64 x 32 9420472320 0.0% 100.0% 0.0% flops 111 x 32 x 32 9420472320 0.0% 100.0% 0.0% flops 111 x 32 x 64 9420472320 0.0% 100.0% 0.0% flops 98 x 64 x 64 9980608512 0.0% 100.0% 0.0% flops 98 x 64 x 32 9980608512 0.0% 100.0% 0.0% flops 98 x 32 x 32 9980608512 0.0% 100.0% 0.0% flops 98 x 32 x 64 9980608512 0.0% 100.0% 0.0% flops 120 x 32 x 32 10184294400 0.0% 100.0% 0.0% flops 120 x 32 x 64 10184294400 0.0% 100.0% 0.0% flops 120 x 64 x 64 10184294400 0.0% 100.0% 0.0% flops 120 x 64 x 32 10184294400 0.0% 100.0% 0.0% flops 111 x 64 x 422 14269946880 0.0% 100.0% 0.0% flops 111 x 32 x 422 14269946880 0.0% 100.0% 0.0% flops 111 x 64 x 427 14439022080 0.0% 100.0% 0.0% flops 111 x 32 x 427 14439022080 0.0% 100.0% 0.0% flops 111 x 64 x 431 14574282240 0.0% 100.0% 0.0% flops 111 x 32 x 431 14574282240 0.0% 100.0% 0.0% flops 98 x 64 x 422 15118430208 0.0% 100.0% 0.0% flops 98 x 32 x 422 15118430208 0.0% 100.0% 0.0% flops 98 x 64 x 427 15297558528 0.0% 100.0% 0.0% flops 98 x 32 x 427 15297558528 0.0% 100.0% 0.0% flops 120 x 32 x 422 15426969600 0.0% 100.0% 0.0% flops 120 x 64 x 422 15426969600 0.0% 100.0% 0.0% flops 98 x 64 x 431 15440861184 0.0% 100.0% 0.0% flops 98 x 32 x 431 15440861184 0.0% 100.0% 0.0% flops 120 x 32 x 427 15609753600 0.0% 100.0% 0.0% flops 120 x 64 x 427 15609753600 0.0% 100.0% 0.0% flops 120 x 32 x 431 15755980800 0.0% 100.0% 0.0% flops 120 x 64 x 431 15755980800 0.0% 100.0% 0.0% flops 32 x 32 x 422 17464885248 0.0% 100.0% 0.0% flops 32 x 64 x 422 17464885248 0.0% 100.0% 0.0% flops 32 x 32 x 427 17671815168 0.0% 100.0% 0.0% flops 32 x 64 x 427 17671815168 0.0% 100.0% 0.0% flops 32 x 32 x 431 17837359104 0.0% 100.0% 0.0% flops 32 x 64 x 431 17837359104 0.0% 100.0% 0.0% flops 9 x 9 x 64 22084130304 0.0% 100.0% 0.0% flops 9 x 9 x 32 22084130304 0.0% 100.0% 0.0% flops 22 x 9 x 64 26917862400 0.0% 100.0% 0.0% flops 22 x 9 x 32 26917862400 0.0% 100.0% 0.0% flops 9 x 22 x 64 26942750208 0.0% 100.0% 0.0% flops 9 x 22 x 32 26942750208 0.0% 100.0% 0.0% flops 22 x 22 x 64 33503641600 0.0% 100.0% 0.0% flops 22 x 22 x 32 33503641600 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 112840197120 100.0% 0.0% 0.0% flops total 1.896345E+12 6.0% 94.0% 0.0% flops max/rank 83.755100E+09 11.7% 88.3% 0.0% matmuls inhomo. stacks 68796 100.0% 0.0% 0.0% matmuls total 96003990 0.1% 99.9% 0.0% number of processed stacks 2257260 3.0% 97.0% 0.0% average stack size 1.0 43.8 0.0 marketing flops 2.107629E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 234.143744E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 822960 MPI messages size (bytes): total size 161.737343E+09 min size 0.000000E+00 max size 1.486088E+06 average size 196.531234E+03 MPI breakdown and total messages size (bytes): size <= 128 5610 0 128 < size <= 8192 169820 1391165440 8192 < size <= 32768 212110 4169891840 32768 < size <= 131072 243000 26542080000 131072 < size <= 4194304 192420 129634037440 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 95 12. MP_Allreduce 12329 16. MP_Alltoall 8655 36603. MP_ISend 54820 93714. MP_IRecv 54820 91356. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3683 62471. MP_Allreduce 10327 343. MP_Sync 54 MP_Alltoall 1843 594863. MP_SendRecv 8330 18700. MP_ISendRecv 8330 18700. MP_Wait 31172 MP_comm_split 50 MP_ISend 20872 59666. MP_IRecv 20872 59666. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.014 0.037 31.096 31.096 qs_mol_dyn_low 1 2.0 0.004 0.004 30.853 30.863 qs_forces 11 3.9 0.002 0.003 30.728 30.733 qs_energies 11 4.9 0.001 0.002 28.778 28.783 scf_env_do_scf 11 5.9 0.001 0.003 26.051 26.052 scf_env_do_scf_inner_loop 108 6.5 0.003 0.028 23.417 23.418 velocity_verlet 10 3.0 0.001 0.001 17.874 17.876 rebuild_ks_matrix 119 8.3 0.001 0.001 9.328 9.506 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.017 9.327 9.506 qs_scf_new_mos 108 7.5 0.001 0.001 9.239 9.353 qs_scf_loop_do_ot 108 8.5 0.001 0.001 9.238 9.353 ot_scf_mini 108 9.5 0.003 0.003 8.776 8.890 dbcsr_multiply_generic 2286 12.5 0.125 0.172 8.396 8.727 qs_ks_update_qs_env 119 7.6 0.001 0.001 8.281 8.442 qs_rho_update_rho_low 119 7.7 0.001 0.001 7.423 7.429 calculate_rho_elec 119 8.7 0.035 0.040 7.423 7.429 sum_up_and_integrate 119 10.3 0.001 0.003 7.051 7.058 integrate_v_rspace 119 11.3 0.003 0.003 7.039 7.046 multiply_cannon 2286 13.5 0.209 0.247 4.159 5.380 grid_collocate_task_list 119 9.7 4.994 5.204 4.994 5.204 grid_integrate_task_list 119 12.3 4.752 4.943 4.752 4.943 ot_mini 108 10.5 0.001 0.001 4.756 4.869 multiply_cannon_loop 2286 14.5 0.127 0.185 3.349 4.306 qs_ot_get_derivative 108 11.5 0.001 0.001 3.544 3.659 multiply_cannon_multrec 13716 15.5 2.023 3.286 2.036 3.299 qs_ot_get_p 119 10.4 0.001 0.001 2.611 2.763 make_m2s 4572 13.5 0.075 0.100 2.566 2.671 init_scf_loop 11 6.9 0.000 0.000 2.605 2.607 mp_waitall_1 156604 16.6 1.659 2.408 1.659 2.408 fft_wrap_pw1pw2 1201 11.6 0.014 0.018 2.197 2.211 make_images 4572 14.5 0.253 0.304 2.080 2.176 density_rs2pw 119 9.7 0.005 0.008 2.002 2.162 fft_wrap_pw1pw2_140 487 12.2 0.103 0.111 1.803 1.823 fft3d_ps 1201 13.6 0.649 0.687 1.777 1.795 potential_pw2rs 119 12.3 0.009 0.014 1.714 1.722 init_scf_run 11 5.9 0.000 0.006 1.680 1.680 scf_env_initial_rho_setup 11 6.9 0.000 0.004 1.679 1.680 prepare_preconditioner 11 7.9 0.000 0.000 1.500 1.507 make_preconditioner 11 8.9 0.000 0.000 1.500 1.507 make_full_inverse_cholesky 11 9.9 0.000 0.000 1.383 1.406 mp_alltoall_d11v 2130 13.8 1.243 1.397 1.243 1.397 wfi_extrapolate 11 7.9 0.001 0.001 1.388 1.388 qs_ot_get_derivative_diag 49 12.0 0.001 0.002 1.263 1.325 mp_sum_l 11298 13.2 0.965 1.324 0.965 1.324 qs_ot_p2m_diag 50 11.0 0.010 0.023 1.299 1.315 qs_ot_get_derivative_taylor 59 13.0 0.002 0.003 1.200 1.253 ot_diis_step 108 11.5 0.014 0.016 1.187 1.188 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.157 1.174 transfer_rs2pw 487 10.6 0.006 0.007 0.987 1.138 multiply_cannon_metrocomm3 13716 15.5 0.044 0.092 0.479 1.112 transfer_pw2rs 487 13.2 0.005 0.006 1.096 1.104 cp_dbcsr_syevd 50 12.0 0.004 0.004 1.100 1.101 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 0.937 1.062 apply_single 119 13.6 0.000 0.001 0.936 1.062 mp_alltoall_z22v 1201 15.6 0.973 1.048 0.973 1.048 mp_waitany 20872 13.8 0.844 1.036 0.844 1.036 make_images_data 4572 15.5 0.061 0.102 0.886 1.036 hybrid_alltoall_any 4725 16.4 0.071 0.196 0.747 0.954 qs_energies_init_hamiltonians 11 5.9 0.001 0.002 0.854 0.858 cp_fm_diag_elpa 50 13.0 0.000 0.000 0.812 0.812 cp_fm_redistribute_end 50 14.0 0.397 0.784 0.401 0.786 cp_fm_diag_elpa_base 50 14.0 0.360 0.731 0.383 0.768 mp_allgather_i34 2286 14.5 0.342 0.764 0.342 0.764 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 0.688 0.754 transfer_rs2pw_140 130 11.5 0.143 0.161 0.595 0.751 make_images_sizes 4572 15.5 0.006 0.014 0.604 0.712 dbcsr_complete_redistribute 329 12.2 0.109 0.127 0.673 0.709 mp_alltoall_i44 4572 16.5 0.597 0.706 0.597 0.706 rs_gather_matrices 119 12.3 0.054 0.061 0.535 0.684 transfer_pw2rs_140 130 13.9 0.186 0.208 0.637 0.683 cp_fm_cholesky_invert 11 10.9 0.664 0.671 0.664 0.671 multiply_cannon_metrocomm1 13716 15.5 0.048 0.095 0.370 0.649 mp_sum_d 4139 12.0 0.482 0.642 0.482 0.642 arnoldi_extremal 119 11.4 0.002 0.003 0.604 0.641 arnoldi_normal_ev 119 12.4 0.002 0.004 0.602 0.639 calculate_dm_sparse 119 9.5 0.001 0.001 0.536 0.624 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="106", plot="h2o_64_md", label="(4n/9r/4t)", y=31.096000, yerr=0.000000 PlotPoint: name="107", plot="h2o_64_md_mem", label="(4n/9r/4t)", y=223.545455, yerr=0.782030 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/07/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 40 x 32 x 200 487424000 0.0% 100.0% 0.0% flops 40 x 64 x 200 487424000 0.0% 100.0% 0.0% flops 62 x 32 x 200 755507200 0.0% 100.0% 0.0% flops 62 x 64 x 200 755507200 0.0% 100.0% 0.0% flops 76 x 64 x 200 926105600 0.0% 100.0% 0.0% flops 76 x 32 x 200 926105600 0.0% 100.0% 0.0% flops 40 x 32 x 209 1018716160 0.0% 100.0% 0.0% flops 40 x 64 x 209 1018716160 0.0% 100.0% 0.0% flops 40 x 32 x 222 1082081280 0.0% 100.0% 0.0% flops 40 x 64 x 222 1082081280 0.0% 100.0% 0.0% flops 111 x 32 x 200 1352601600 0.0% 100.0% 0.0% flops 111 x 64 x 200 1352601600 0.0% 100.0% 0.0% flops 40 x 32 x 32 1357905920 0.0% 100.0% 0.0% flops 40 x 64 x 32 1357905920 0.0% 100.0% 0.0% flops 62 x 32 x 209 1579010048 0.0% 100.0% 0.0% flops 62 x 64 x 209 1579010048 0.0% 100.0% 0.0% flops 62 x 32 x 222 1677225984 0.0% 100.0% 0.0% flops 62 x 64 x 222 1677225984 0.0% 100.0% 0.0% flops 76 x 64 x 209 1935560704 0.0% 100.0% 0.0% flops 76 x 32 x 209 1935560704 0.0% 100.0% 0.0% flops 76 x 64 x 222 2055954432 0.0% 100.0% 0.0% flops 76 x 32 x 222 2055954432 0.0% 100.0% 0.0% flops 85 x 64 x 200 2071552000 0.0% 100.0% 0.0% flops 85 x 32 x 200 2071552000 0.0% 100.0% 0.0% flops 62 x 32 x 32 2104754176 0.0% 100.0% 0.0% flops 62 x 64 x 32 2104754176 0.0% 100.0% 0.0% flops 98 x 32 x 200 2388377600 0.0% 100.0% 0.0% flops 98 x 64 x 200 2388377600 0.0% 100.0% 0.0% flops 76 x 64 x 32 2580021248 0.0% 100.0% 0.0% flops 76 x 32 x 32 2580021248 0.0% 100.0% 0.0% flops 64 x 64 x 200 2759065600 0.0% 100.0% 0.0% flops 64 x 32 x 200 2759065600 0.0% 100.0% 0.0% flops 111 x 32 x 209 2826937344 0.0% 100.0% 0.0% flops 111 x 64 x 209 2826937344 0.0% 100.0% 0.0% flops 120 x 32 x 200 2924544000 0.0% 100.0% 0.0% flops 120 x 64 x 200 2924544000 0.0% 100.0% 0.0% flops 111 x 32 x 222 3002775552 0.0% 100.0% 0.0% flops 111 x 64 x 222 3002775552 0.0% 100.0% 0.0% flops 64 x 64 x 32 3359637504 0.0% 100.0% 0.0% flops 64 x 32 x 32 3359637504 0.0% 100.0% 0.0% flops 111 x 32 x 32 3768188928 0.0% 100.0% 0.0% flops 111 x 64 x 32 3768188928 0.0% 100.0% 0.0% flops 85 x 64 x 209 4329543680 0.0% 100.0% 0.0% flops 85 x 32 x 209 4329543680 0.0% 100.0% 0.0% flops 89 x 64 x 200 4338073600 0.0% 100.0% 0.0% flops 89 x 32 x 200 4338073600 0.0% 100.0% 0.0% flops 85 x 64 x 222 4598845440 0.0% 100.0% 0.0% flops 85 x 32 x 222 4598845440 0.0% 100.0% 0.0% flops 98 x 32 x 209 4991709184 0.0% 100.0% 0.0% flops 98 x 64 x 209 4991709184 0.0% 100.0% 0.0% flops 98 x 32 x 222 5302198272 0.0% 100.0% 0.0% flops 98 x 64 x 222 5302198272 0.0% 100.0% 0.0% flops 64 x 64 x 209 5766447104 0.0% 100.0% 0.0% flops 64 x 32 x 209 5766447104 0.0% 100.0% 0.0% flops 85 x 64 x 32 5771100160 0.0% 100.0% 0.0% flops 85 x 32 x 32 5771100160 0.0% 100.0% 0.0% flops 120 x 32 x 209 6112296960 0.0% 100.0% 0.0% flops 120 x 64 x 209 6112296960 0.0% 100.0% 0.0% flops 64 x 64 x 222 6125125632 0.0% 100.0% 0.0% flops 64 x 32 x 222 6125125632 0.0% 100.0% 0.0% flops 120 x 32 x 222 6492487680 0.0% 100.0% 0.0% flops 120 x 64 x 222 6492487680 0.0% 100.0% 0.0% flops 98 x 32 x 32 6653739008 0.0% 100.0% 0.0% flops 98 x 64 x 32 6653739008 0.0% 100.0% 0.0% flops 120 x 32 x 32 8147435520 0.0% 100.0% 0.0% flops 120 x 64 x 32 8147435520 0.0% 100.0% 0.0% flops 32 x 32 x 200 8277196800 0.0% 100.0% 0.0% flops 32 x 64 x 200 8277196800 0.0% 100.0% 0.0% flops 89 x 64 x 209 9066573824 0.0% 100.0% 0.0% flops 89 x 32 x 209 9066573824 0.0% 100.0% 0.0% flops 89 x 64 x 222 9630523392 0.0% 100.0% 0.0% flops 89 x 32 x 222 9630523392 0.0% 100.0% 0.0% flops 32 x 32 x 32 10078912512 0.0% 100.0% 0.0% flops 32 x 64 x 32 10078912512 0.0% 100.0% 0.0% flops 89 x 64 x 32 12085362688 0.0% 100.0% 0.0% flops 89 x 32 x 32 12085362688 0.0% 100.0% 0.0% flops 32 x 32 x 209 17299341312 0.0% 100.0% 0.0% flops 32 x 64 x 209 17299341312 0.0% 100.0% 0.0% flops 32 x 32 x 222 18375376896 0.0% 100.0% 0.0% flops 32 x 64 x 222 18375376896 0.0% 100.0% 0.0% flops 9 x 9 x 32 44168260608 0.0% 100.0% 0.0% flops 22 x 9 x 32 53835724800 0.0% 100.0% 0.0% flops 9 x 22 x 32 53885500416 0.0% 100.0% 0.0% flops 22 x 22 x 32 67007283200 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 409643995136 100.0% 0.0% 0.0% flops total 1.940194E+12 21.1% 78.9% 0.0% flops max/rank 126.888791E+09 24.8% 75.2% 0.0% matmuls inhomo. stacks 389676 100.0% 0.0% 0.0% matmuls total 101225376 0.4% 99.6% 0.0% number of processed stacks 3787752 10.3% 89.7% 0.0% average stack size 1.0 29.7 0.0 marketing flops 2.107592E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 270.249984E+06 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 1042416 MPI messages size (bytes): total size 150.443262E+09 min size 0.000000E+00 max size 1.188816E+06 average size 144.321719E+03 MPI breakdown and total messages size (bytes): size <= 128 228256 0 128 < size <= 8192 126888 1039466496 8192 < size <= 32768 191472 3137077248 32768 < size <= 131072 295800 25899827200 131072 < size <= 4194304 200000 120367247040 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3672 62653. MP_Allreduce 10304 342. MP_Sync 54 MP_Alltoall 1582 2412273. MP_SendRecv 8211 74133. MP_ISendRecv 8211 74133. MP_Wait 16271 MP_comm_split 50 MP_ISend 7280 135929. MP_IRecv 7280 135929. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.025 0.041 36.256 36.260 qs_mol_dyn_low 1 2.0 0.018 0.033 35.914 35.925 qs_forces 11 3.9 0.004 0.012 35.775 35.784 qs_energies 11 4.9 0.003 0.008 33.750 33.761 scf_env_do_scf 11 5.9 0.001 0.003 30.637 30.638 scf_env_do_scf_inner_loop 108 6.5 0.004 0.026 26.458 26.464 velocity_verlet 10 3.0 0.005 0.010 21.068 21.071 qs_scf_new_mos 108 7.5 0.001 0.001 10.755 10.860 qs_scf_loop_do_ot 108 8.5 0.001 0.002 10.755 10.859 dbcsr_multiply_generic 2286 12.5 0.138 0.188 10.043 10.368 ot_scf_mini 108 9.5 0.003 0.005 10.184 10.279 rebuild_ks_matrix 119 8.3 0.001 0.001 9.439 9.598 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.022 9.438 9.598 qs_rho_update_rho_low 119 7.7 0.001 0.001 9.030 9.035 calculate_rho_elec 119 8.7 0.051 0.058 9.029 9.035 qs_ks_update_qs_env 119 7.6 0.001 0.001 8.398 8.542 sum_up_and_integrate 119 10.3 0.002 0.003 6.915 6.922 integrate_v_rspace 119 11.3 0.003 0.003 6.902 6.909 grid_collocate_task_list 119 9.7 6.700 6.861 6.700 6.861 ot_mini 108 10.5 0.001 0.002 5.686 5.790 multiply_cannon 2286 13.5 0.230 0.269 4.721 5.531 grid_integrate_task_list 119 12.3 4.852 4.936 4.852 4.936 multiply_cannon_loop 2286 14.5 0.240 0.346 3.753 4.545 qs_ot_get_derivative 108 11.5 0.001 0.002 4.313 4.408 init_scf_loop 11 6.9 0.001 0.005 4.155 4.157 make_m2s 4572 13.5 0.096 0.130 3.483 3.631 multiply_cannon_multrec 27432 15.5 2.393 3.399 2.407 3.414 prepare_preconditioner 11 7.9 0.000 0.001 2.993 3.006 make_preconditioner 11 8.9 0.000 0.001 2.993 3.006 qs_ot_get_p 119 10.4 0.001 0.001 2.822 2.950 make_full_inverse_cholesky 11 9.9 0.000 0.000 2.542 2.880 make_images 4572 14.5 0.344 0.448 2.692 2.785 fft_wrap_pw1pw2 1201 11.6 0.014 0.020 2.217 2.248 density_rs2pw 119 9.7 0.005 0.008 2.033 2.190 mp_waitall_1 137007 16.6 1.450 2.177 1.450 2.177 init_scf_run 11 5.9 0.001 0.005 1.913 1.914 scf_env_initial_rho_setup 11 6.9 0.001 0.004 1.913 1.913 fft_wrap_pw1pw2_140 487 12.2 0.116 0.121 1.858 1.897 fft3d_ps 1201 13.6 0.724 0.780 1.734 1.763 potential_pw2rs 119 12.3 0.011 0.014 1.682 1.684 qs_ot_get_derivative_diag 49 12.0 0.001 0.002 1.543 1.596 wfi_extrapolate 11 7.9 0.001 0.003 1.547 1.548 qs_ot_get_derivative_taylor 59 13.0 0.002 0.003 1.468 1.516 mp_sum_l 11298 13.2 1.055 1.489 1.055 1.489 qs_ot_p2m_diag 50 11.0 0.014 0.023 1.376 1.389 ot_diis_step 108 11.5 0.016 0.019 1.337 1.337 dbcsr_complete_redistribute 329 12.2 0.157 0.180 0.973 1.256 transfer_rs2pw 487 10.6 0.005 0.006 1.040 1.231 cp_fm_upper_to_full 72 13.8 0.888 1.203 0.888 1.203 apply_preconditioner_dbcsr 119 12.6 0.000 0.001 1.056 1.199 apply_single 119 13.6 0.000 0.001 1.056 1.199 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.159 1.174 cp_dbcsr_syevd 50 12.0 0.004 0.004 1.114 1.120 transfer_pw2rs 487 13.2 0.004 0.005 1.059 1.060 make_images_data 4572 15.5 0.065 0.111 0.947 1.041 qs_energies_init_hamiltonians 11 5.9 0.001 0.004 1.001 1.012 mp_alltoall_d11v 2130 13.8 0.829 0.978 0.829 0.978 copy_fm_to_dbcsr 176 11.2 0.001 0.002 0.665 0.959 hybrid_alltoall_any 4725 16.4 0.079 0.165 0.825 0.947 mp_alltoall_z22v 1201 15.6 0.876 0.902 0.876 0.902 make_images_sizes 4572 15.5 0.006 0.014 0.754 0.876 mp_alltoall_i22 627 13.8 0.534 0.873 0.534 0.873 mp_alltoall_i44 4572 16.5 0.748 0.870 0.748 0.870 multiply_cannon_metrocomm3 27432 15.5 0.051 0.110 0.465 0.869 cp_fm_diag_elpa 50 13.0 0.000 0.000 0.854 0.855 cp_fm_cholesky_invert 11 10.9 0.836 0.845 0.836 0.845 mp_waitany 7280 13.7 0.669 0.843 0.669 0.843 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 0.757 0.833 cp_fm_redistribute_end 50 14.0 0.419 0.830 0.424 0.833 dbcsr_make_images_dense 3978 14.8 0.066 0.101 0.574 0.816 cp_fm_diag_elpa_base 50 14.0 0.365 0.747 0.407 0.815 transfer_rs2pw_140 130 11.5 0.127 0.140 0.599 0.783 transfer_fm_to_dbcsr 11 9.9 0.000 0.000 0.447 0.728 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="108", plot="h2o_64_md", label="(4n/6r/6t)", y=36.260000, yerr=0.000000 PlotPoint: name="109", plot="h2o_64_md_mem", label="(4n/6r/6t)", y=256.272727, yerr=2.561637 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/08/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 40 x 64 x 64 10863247360 0.0% 100.0% 0.0% flops 40 x 64 x 640 24956108800 0.0% 100.0% 0.0% flops 32 x 64 x 64 26877100032 0.0% 100.0% 0.0% flops 9 x 9 x 64 44168260608 0.0% 100.0% 0.0% flops 71 x 64 x 64 48205660160 0.0% 100.0% 0.0% flops 22 x 9 x 64 53835724800 0.0% 100.0% 0.0% flops 9 x 22 x 64 53885500416 0.0% 100.0% 0.0% flops 80 x 64 x 64 54316236800 0.0% 100.0% 0.0% flops 22 x 22 x 64 67007283200 0.0% 100.0% 0.0% flops 71 x 64 x 640 110742732800 0.0% 100.0% 0.0% flops 80 x 64 x 640 124780544000 0.0% 100.0% 0.0% flops 32 x 64 x 640 141264158720 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 261929041920 100.0% 0.0% 0.0% flops total 1.943572E+12 13.5% 86.5% 0.0% flops max/rank 122.902337E+09 13.9% 86.1% 0.0% matmuls inhomo. stacks 122304 100.0% 0.0% 0.0% matmuls total 90872996 0.1% 99.9% 0.0% number of processed stacks 1449216 8.4% 91.6% 0.0% average stack size 1.0 68.4 0.0 marketing flops 2.107587E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 325.853184E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 219456 MPI messages size (bytes): total size 97.042514E+09 min size 0.000000E+00 max size 3.276800E+06 average size 442.195750E+03 MPI breakdown and total messages size (bytes): size <= 128 1452 0 128 < size <= 8192 0 0 8192 < size <= 32768 101892 3336634368 32768 < size <= 131072 0 0 131072 < size <= 4194304 116112 93705670464 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 19 12. MP_Allreduce 12177 16. MP_Alltoall 8655 62672. MP_ISend 36532 167957. MP_IRecv 36532 167930. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3622 63488. MP_Allreduce 10154 346. MP_Sync 54 MP_Alltoall 1582 3682667. MP_SendRecv 5355 94533. MP_ISendRecv 5355 94533. MP_Wait 11335 MP_ISend 5200 225425. MP_IRecv 5200 225425. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.068 34.629 34.631 qs_mol_dyn_low 1 2.0 0.011 0.019 34.231 34.237 qs_forces 11 3.9 0.002 0.003 34.104 34.113 qs_energies 11 4.9 0.001 0.002 32.098 32.111 scf_env_do_scf 11 5.9 0.001 0.002 28.893 28.893 scf_env_do_scf_inner_loop 108 6.5 0.004 0.024 25.521 25.522 velocity_verlet 10 3.0 0.003 0.007 19.761 19.766 qs_scf_new_mos 108 7.5 0.001 0.001 9.826 9.857 qs_scf_loop_do_ot 108 8.5 0.001 0.001 9.826 9.856 rebuild_ks_matrix 119 8.3 0.001 0.001 9.373 9.418 qs_ks_build_kohn_sham_matrix 119 9.3 0.014 0.016 9.372 9.418 ot_scf_mini 108 9.5 0.003 0.003 9.303 9.323 qs_rho_update_rho_low 119 7.7 0.001 0.001 9.066 9.070 calculate_rho_elec 119 8.7 0.075 0.078 9.065 9.070 dbcsr_multiply_generic 2286 12.5 0.143 0.200 8.656 8.764 qs_ks_update_qs_env 119 7.6 0.001 0.001 8.340 8.382 sum_up_and_integrate 119 10.3 0.001 0.002 6.763 6.778 integrate_v_rspace 119 11.3 0.003 0.003 6.752 6.766 grid_collocate_task_list 119 9.7 6.077 6.447 6.077 6.447 ot_mini 108 10.5 0.001 0.001 4.959 4.990 grid_integrate_task_list 119 12.3 4.507 4.676 4.507 4.676 multiply_cannon 2286 13.5 0.221 0.257 4.008 4.468 qs_ot_get_derivative 108 11.5 0.001 0.002 3.595 3.616 density_rs2pw 119 9.7 0.005 0.008 2.637 3.460 init_scf_loop 11 6.9 0.000 0.000 3.347 3.349 multiply_cannon_loop 2286 14.5 0.101 0.151 3.122 3.347 make_m2s 4572 13.5 0.075 0.109 2.996 3.155 qs_ot_get_p 119 10.4 0.001 0.001 2.906 2.947 fft_wrap_pw1pw2 1201 11.6 0.013 0.016 2.872 2.941 make_images 4572 14.5 0.336 0.370 2.352 2.524 fft_wrap_pw1pw2_140 487 12.2 0.150 0.169 2.364 2.507 multiply_cannon_multrec 9144 15.5 2.222 2.381 2.238 2.394 fft3d_ps 1201 13.6 0.832 1.014 2.326 2.377 prepare_preconditioner 11 7.9 0.000 0.000 2.220 2.224 make_preconditioner 11 8.9 0.000 0.000 2.220 2.224 make_full_inverse_cholesky 11 9.9 0.000 0.000 2.068 2.119 transfer_rs2pw 487 10.6 0.006 0.007 1.217 1.989 init_scf_run 11 5.9 0.001 0.005 1.903 1.904 scf_env_initial_rho_setup 11 6.9 0.000 0.003 1.903 1.904 mp_waitall_1 115863 16.7 1.391 1.736 1.391 1.736 potential_pw2rs 119 12.3 0.014 0.018 1.695 1.700 mp_waitany 5200 13.7 0.834 1.601 0.834 1.601 transfer_rs2pw_140 130 11.5 0.117 0.142 0.778 1.546 wfi_extrapolate 11 7.9 0.001 0.001 1.525 1.525 qs_ot_p2m_diag 50 11.0 0.020 0.025 1.450 1.462 mp_alltoall_z22v 1201 15.6 1.324 1.436 1.324 1.436 ot_diis_step 108 11.5 0.018 0.023 1.336 1.338 mp_alltoall_d11v 2130 13.8 1.021 1.303 1.021 1.303 qs_ot_get_derivative_diag 49 12.0 0.001 0.002 1.255 1.271 cp_dbcsr_syevd 50 12.0 0.004 0.004 1.224 1.225 qs_ot_get_derivative_taylor 59 13.0 0.002 0.003 1.196 1.207 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.186 1.192 make_images_data 4572 15.5 0.061 0.095 0.920 1.107 qs_energies_init_hamiltonians 11 5.9 0.001 0.002 1.070 1.079 apply_preconditioner_dbcsr 119 12.6 0.000 0.000 1.006 1.070 apply_single 119 13.6 0.000 0.001 1.006 1.070 cp_fm_cholesky_invert 11 10.9 1.013 1.022 1.013 1.022 hybrid_alltoall_any 4725 16.4 0.089 0.215 0.834 0.993 mp_sum_l 11298 13.2 0.733 0.979 0.733 0.979 cp_fm_diag_elpa 50 13.0 0.000 0.000 0.959 0.960 cp_fm_diag_elpa_base 50 14.0 0.910 0.928 0.953 0.954 transfer_pw2rs 487 13.2 0.004 0.005 0.928 0.932 mp_allgather_i34 2286 14.5 0.426 0.926 0.426 0.926 make_images_sizes 4572 15.5 0.007 0.015 0.706 0.923 mp_alltoall_i44 4572 16.5 0.698 0.917 0.698 0.917 arnoldi_extremal 119 11.4 0.002 0.003 0.736 0.832 arnoldi_normal_ev 119 12.4 0.003 0.004 0.734 0.829 yz_to_x 606 14.1 0.075 0.098 0.780 0.825 dbcsr_complete_redistribute 329 12.2 0.214 0.236 0.772 0.824 rs_gather_matrices 119 12.3 0.055 0.067 0.511 0.817 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 0.693 0.753 x_to_yz 595 15.2 0.082 0.125 0.701 0.738 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="110", plot="h2o_64_md", label="(4n/4r/9t)", y=34.631000, yerr=0.000000 PlotPoint: name="111", plot="h2o_64_md_mem", label="(4n/4r/9t)", y=308.545455, yerr=4.142104 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/09/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 89 x 32 x 409 2217840128 0.0% 100.0% 0.0% flops 89 x 64 x 409 2217840128 0.0% 100.0% 0.0% flops 89 x 32 x 418 2266643456 0.0% 100.0% 0.0% flops 89 x 64 x 418 2266643456 0.0% 100.0% 0.0% flops 107 x 64 x 409 2666392064 0.0% 100.0% 0.0% flops 107 x 32 x 409 2666392064 0.0% 100.0% 0.0% flops 107 x 64 x 418 2725065728 0.0% 100.0% 0.0% flops 107 x 32 x 418 2725065728 0.0% 100.0% 0.0% flops 120 x 32 x 409 2990346240 0.0% 100.0% 0.0% flops 120 x 64 x 409 2990346240 0.0% 100.0% 0.0% flops 89 x 32 x 32 3021340672 0.0% 100.0% 0.0% flops 89 x 32 x 64 3021340672 0.0% 100.0% 0.0% flops 89 x 64 x 32 3021340672 0.0% 100.0% 0.0% flops 89 x 64 x 64 3021340672 0.0% 100.0% 0.0% flops 120 x 32 x 418 3056148480 0.0% 100.0% 0.0% flops 120 x 64 x 418 3056148480 0.0% 100.0% 0.0% flops 142 x 64 x 409 3538576384 0.0% 100.0% 0.0% flops 142 x 32 x 409 3538576384 0.0% 100.0% 0.0% flops 142 x 64 x 418 3616442368 0.0% 100.0% 0.0% flops 142 x 32 x 418 3616442368 0.0% 100.0% 0.0% flops 107 x 64 x 64 3632398336 0.0% 100.0% 0.0% flops 107 x 64 x 32 3632398336 0.0% 100.0% 0.0% flops 107 x 32 x 32 3632398336 0.0% 100.0% 0.0% flops 107 x 32 x 64 3632398336 0.0% 100.0% 0.0% flops 120 x 32 x 32 4073717760 0.0% 100.0% 0.0% flops 120 x 32 x 64 4073717760 0.0% 100.0% 0.0% flops 120 x 64 x 32 4073717760 0.0% 100.0% 0.0% flops 120 x 64 x 64 4073717760 0.0% 100.0% 0.0% flops 142 x 64 x 64 4820566016 0.0% 100.0% 0.0% flops 142 x 64 x 32 4820566016 0.0% 100.0% 0.0% flops 142 x 32 x 32 4820566016 0.0% 100.0% 0.0% flops 142 x 32 x 64 4820566016 0.0% 100.0% 0.0% flops 111 x 64 x 409 5532140544 0.0% 100.0% 0.0% flops 111 x 32 x 409 5532140544 0.0% 100.0% 0.0% flops 111 x 64 x 418 5653874688 0.0% 100.0% 0.0% flops 111 x 32 x 418 5653874688 0.0% 100.0% 0.0% flops 32 x 64 x 64 6719275008 0.0% 100.0% 0.0% flops 32 x 64 x 32 6719275008 0.0% 100.0% 0.0% flops 32 x 32 x 32 6719275008 0.0% 100.0% 0.0% flops 32 x 32 x 64 6719275008 0.0% 100.0% 0.0% flops 89 x 32 x 431 7011411456 0.0% 100.0% 0.0% flops 89 x 64 x 431 7011411456 0.0% 100.0% 0.0% flops 111 x 64 x 64 7536377856 0.0% 100.0% 0.0% flops 111 x 64 x 32 7536377856 0.0% 100.0% 0.0% flops 111 x 32 x 32 7536377856 0.0% 100.0% 0.0% flops 111 x 32 x 64 7536377856 0.0% 100.0% 0.0% flops 107 x 64 x 431 8429449728 0.0% 100.0% 0.0% flops 107 x 32 x 431 8429449728 0.0% 100.0% 0.0% flops 120 x 32 x 431 9453588480 0.0% 100.0% 0.0% flops 120 x 64 x 431 9453588480 0.0% 100.0% 0.0% flops 142 x 64 x 431 11186746368 0.0% 100.0% 0.0% flops 142 x 32 x 431 11186746368 0.0% 100.0% 0.0% flops 32 x 64 x 409 11284578304 0.0% 100.0% 0.0% flops 32 x 32 x 409 11284578304 0.0% 100.0% 0.0% flops 32 x 64 x 418 11532894208 0.0% 100.0% 0.0% flops 32 x 32 x 418 11532894208 0.0% 100.0% 0.0% flops 111 x 64 x 431 17489138688 0.0% 100.0% 0.0% flops 111 x 32 x 431 17489138688 0.0% 100.0% 0.0% flops 9 x 9 x 64 22084130304 0.0% 100.0% 0.0% flops 9 x 9 x 32 22084130304 0.0% 100.0% 0.0% flops 22 x 9 x 64 26917862400 0.0% 100.0% 0.0% flops 22 x 9 x 32 26917862400 0.0% 100.0% 0.0% flops 9 x 22 x 64 26942750208 0.0% 100.0% 0.0% flops 9 x 22 x 32 26942750208 0.0% 100.0% 0.0% flops 22 x 22 x 64 33503641600 0.0% 100.0% 0.0% flops 22 x 22 x 32 33503641600 0.0% 100.0% 0.0% flops 32 x 64 x 431 35674718208 0.0% 100.0% 0.0% flops 32 x 32 x 431 35674718208 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 555803951104 100.0% 0.0% 0.0% flops total 2.107308E+12 26.4% 73.6% 0.0% flops max/rank 262.290333E+09 28.3% 71.7% 0.0% matmuls inhomo. stacks 303960 100.0% 0.0% 0.0% matmuls total 96047982 0.3% 99.7% 0.0% number of processed stacks 2272806 13.4% 86.6% 0.0% average stack size 1.0 48.6 0.0 marketing flops 2.107592E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 400.797696E+06 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 219456 MPI messages size (bytes): total size 139.149754E+09 min size 0.000000E+00 max size 4.537280E+06 average size 634.066750E+03 MPI breakdown and total messages size (bytes): size <= 128 1386 0 128 < size <= 8192 21148 173244416 8192 < size <= 32768 58442 1568899072 32768 < size <= 131072 38700 3527147520 131072 < size <= 4194304 78110 38969991440 4194304 < size <= 16777216 21670 94910778800 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3672 62884. MP_Allreduce 10304 424. MP_Sync 54 MP_Alltoall 1582 4823651. MP_SendRecv 3927 131600. MP_ISendRecv 3927 131600. MP_Wait 8867 MP_comm_split 50 MP_ISend 4160 325000. MP_IRecv 4160 325000. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.037 0.056 41.754 41.767 qs_mol_dyn_low 1 2.0 0.015 0.020 41.319 41.325 qs_forces 11 3.9 0.006 0.011 41.195 41.207 qs_energies 11 4.9 0.004 0.007 39.004 39.017 scf_env_do_scf 11 5.9 0.001 0.003 35.130 35.130 scf_env_do_scf_inner_loop 108 6.5 0.005 0.027 28.930 28.930 velocity_verlet 10 3.0 0.003 0.006 24.746 24.752 qs_scf_new_mos 108 7.5 0.001 0.001 11.461 11.540 qs_scf_loop_do_ot 108 8.5 0.001 0.002 11.460 11.539 dbcsr_multiply_generic 2286 12.5 0.141 0.147 10.822 10.955 ot_scf_mini 108 9.5 0.003 0.004 10.850 10.927 rebuild_ks_matrix 119 8.3 0.000 0.001 10.212 10.322 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.017 10.211 10.322 qs_rho_update_rho_low 119 7.7 0.001 0.001 10.232 10.239 calculate_rho_elec 119 8.7 0.100 0.106 10.231 10.238 qs_ks_update_qs_env 119 7.6 0.001 0.001 9.191 9.283 grid_collocate_task_list 119 9.7 7.517 7.646 7.517 7.646 sum_up_and_integrate 119 10.3 0.001 0.002 7.012 7.018 integrate_v_rspace 119 11.3 0.003 0.003 7.000 7.006 multiply_cannon 2286 13.5 0.233 0.245 5.808 6.763 init_scf_loop 11 6.9 0.002 0.004 6.168 6.170 ot_mini 108 10.5 0.004 0.012 5.615 5.701 multiply_cannon_loop 2286 14.5 0.138 0.141 4.778 5.282 grid_integrate_task_list 119 12.3 4.954 5.053 4.954 5.053 prepare_preconditioner 11 7.9 0.000 0.000 4.894 4.898 make_preconditioner 11 8.9 0.001 0.001 4.894 4.898 make_full_inverse_cholesky 11 9.9 0.000 0.000 4.188 4.777 mp_waitall_1 114435 16.7 2.964 4.324 2.964 4.324 make_m2s 4572 13.5 0.083 0.086 3.461 3.775 qs_ot_get_derivative 108 11.5 0.001 0.002 3.549 3.629 multiply_cannon_multrec 13716 15.5 2.312 3.537 2.330 3.556 qs_ot_get_p 119 10.4 0.001 0.001 3.139 3.229 multiply_cannon_metrocomm3 13716 15.5 0.044 0.045 1.743 3.046 make_images 4572 14.5 0.458 0.501 2.525 2.827 fft_wrap_pw1pw2 1201 11.6 0.014 0.016 2.649 2.684 density_rs2pw 119 9.7 0.005 0.005 2.282 2.490 cp_fm_upper_to_full 72 13.8 1.893 2.443 1.893 2.443 init_scf_run 11 5.9 0.001 0.004 2.290 2.291 scf_env_initial_rho_setup 11 6.9 0.001 0.003 2.289 2.291 fft_wrap_pw1pw2_140 487 12.2 0.174 0.176 2.250 2.283 fft3d_ps 1201 13.6 0.895 0.962 2.062 2.099 ot_diis_step 108 11.5 0.018 0.019 2.046 2.049 dbcsr_complete_redistribute 329 12.2 0.274 0.302 1.428 1.891 qs_ot_p2m_diag 50 11.0 0.027 0.042 1.850 1.867 wfi_extrapolate 11 7.9 0.001 0.001 1.835 1.835 apply_preconditioner_dbcsr 119 12.6 0.000 0.001 1.716 1.835 apply_single 119 13.6 0.000 0.000 1.716 1.835 cp_dbcsr_syevd 50 12.0 0.004 0.004 1.652 1.653 make_images_data 4572 15.5 0.063 0.066 1.294 1.584 hybrid_alltoall_any 4725 16.4 0.098 0.160 1.221 1.581 potential_pw2rs 119 12.3 0.016 0.017 1.549 1.554 copy_fm_to_dbcsr 176 11.2 0.001 0.001 0.981 1.443 cp_fm_cholesky_invert 11 10.9 1.418 1.434 1.418 1.434 cp_fm_diag_elpa 50 13.0 0.000 0.000 1.364 1.367 qs_energies_init_hamiltonians 11 5.9 0.002 0.004 1.347 1.360 cp_fm_redistribute_end 50 14.0 0.662 1.312 0.665 1.314 cp_fm_diag_elpa_base 50 14.0 0.467 0.996 0.645 1.293 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.188 1.229 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.216 1.228 mp_alltoall_i22 627 13.8 0.725 1.218 0.725 1.218 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 1.147 1.187 transfer_rs2pw 487 10.6 0.006 0.006 0.966 1.186 transfer_fm_to_dbcsr 11 9.9 0.000 0.000 0.699 1.185 mp_alltoall_d11v 2130 13.8 1.074 1.168 1.074 1.168 multiply_cannon_metrocomm4 11430 15.5 0.043 0.045 0.474 1.150 mp_alltoall_z22v 1201 15.6 1.026 1.057 1.026 1.057 mp_irecv_dv 29167 16.4 0.405 1.049 0.405 1.049 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 0.870 0.933 mp_allgather_i34 2286 14.5 0.384 0.925 0.384 0.925 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.847 0.849 transfer_pw2rs 487 13.2 0.004 0.004 0.842 0.846 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="112", plot="h2o_64_md", label="(4n/3r/12t)", y=41.767000, yerr=0.000000 PlotPoint: name="113", plot="h2o_64_md_mem", label="(4n/3r/12t)", y=375.727273, yerr=6.916264 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/10/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 58 x 64 x 64 15751708672 0.0% 100.0% 0.0% flops 32 x 64 x 64 26877100032 0.0% 100.0% 0.0% flops 58 x 64 x 640 36186357760 0.0% 100.0% 0.0% flops 71 x 64 x 64 38564528128 0.0% 100.0% 0.0% flops 80 x 64 x 64 43452989440 0.0% 100.0% 0.0% flops 9 x 9 x 64 44168260608 0.0% 100.0% 0.0% flops 22 x 9 x 64 53835724800 0.0% 100.0% 0.0% flops 9 x 22 x 64 53885500416 0.0% 100.0% 0.0% flops 22 x 22 x 64 67007283200 0.0% 100.0% 0.0% flops 71 x 64 x 640 88594186240 0.0% 100.0% 0.0% flops 80 x 64 x 640 99824435200 0.0% 100.0% 0.0% flops 32 x 64 x 640 141264158720 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 342522593280 100.0% 0.0% 0.0% flops total 1.972676E+12 17.4% 82.6% 0.0% flops max/rank 253.383579E+09 18.0% 82.0% 0.0% matmuls inhomo. stacks 152880 100.0% 0.0% 0.0% matmuls total 90862804 0.2% 99.8% 0.0% number of processed stacks 1405732 10.9% 89.1% 0.0% average stack size 1.0 72.4 0.0 marketing flops 2.107587E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 1.015849E+09 # max total images/rank 2 # max 3D layers 1 # MPI messages exchanged 91440 MPI messages size (bytes): total size 85.748679E+09 min size 0.000000E+00 max size 6.553600E+06 average size 937.758938E+03 MPI breakdown and total messages size (bytes): size <= 128 572 0 128 < size <= 8192 0 0 8192 < size <= 32768 21148 692256768 32768 < size <= 131072 19224 1259864064 131072 < size <= 4194304 41040 21941452800 4194304 < size <= 16777216 9456 61855174464 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3622 63723. MP_Allreduce 10154 429. MP_Sync 54 MP_Alltoall 1582 7383731. MP_SendRecv 2499 189067. MP_ISendRecv 2499 189067. MP_Wait 6399 MP_ISend 3120 546875. MP_IRecv 3120 546875. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.071 0.091 47.679 47.681 qs_mol_dyn_low 1 2.0 0.022 0.023 47.253 47.261 qs_forces 11 3.9 0.009 0.010 47.114 47.119 qs_energies 11 4.9 0.011 0.015 44.776 44.782 scf_env_do_scf 11 5.9 0.003 0.005 40.192 40.193 scf_env_do_scf_inner_loop 108 6.5 0.006 0.024 31.025 31.027 velocity_verlet 10 3.0 0.002 0.003 29.336 29.344 qs_scf_new_mos 108 7.5 0.001 0.001 12.273 12.353 qs_scf_loop_do_ot 108 8.5 0.001 0.001 12.272 12.352 ot_scf_mini 108 9.5 0.004 0.004 11.582 11.645 dbcsr_multiply_generic 2286 12.5 0.149 0.196 11.532 11.572 qs_rho_update_rho_low 119 7.7 0.001 0.001 11.357 11.362 calculate_rho_elec 119 8.7 0.149 0.153 11.356 11.361 rebuild_ks_matrix 119 8.3 0.000 0.001 10.624 10.663 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.017 10.623 10.662 qs_ks_update_qs_env 119 7.6 0.001 0.001 9.563 9.593 init_scf_loop 11 6.9 0.003 0.004 9.131 9.133 grid_collocate_task_list 119 9.7 8.197 8.392 8.197 8.392 prepare_preconditioner 11 7.9 0.000 0.000 7.739 7.764 make_preconditioner 11 8.9 0.001 0.002 7.739 7.764 make_full_inverse_cholesky 11 9.9 0.000 0.000 6.291 7.606 sum_up_and_integrate 119 10.3 0.001 0.002 7.168 7.179 integrate_v_rspace 119 11.3 0.003 0.003 7.155 7.165 ot_mini 108 10.5 0.001 0.001 6.317 6.384 multiply_cannon 2286 13.5 0.244 0.261 5.598 6.377 grid_integrate_task_list 119 12.3 5.032 5.102 5.032 5.102 multiply_cannon_loop 2286 14.5 0.103 0.129 4.591 4.723 cp_fm_upper_to_full 72 14.2 3.105 4.448 3.105 4.448 make_m2s 4572 13.5 0.077 0.105 4.111 4.420 qs_ot_get_derivative 108 11.5 0.002 0.002 4.249 4.314 mp_waitall_1 94719 16.7 2.978 3.986 2.978 3.986 make_images 4572 14.5 0.538 0.567 3.111 3.452 qs_ot_get_p 119 10.4 0.001 0.001 3.171 3.244 dbcsr_complete_redistribute 329 12.2 0.343 0.366 2.258 3.075 fft_wrap_pw1pw2 1201 11.6 0.014 0.016 2.931 2.966 multiply_cannon_multrec 9144 15.5 2.461 2.797 2.484 2.816 density_rs2pw 119 9.7 0.005 0.008 2.590 2.762 init_scf_run 11 5.9 0.001 0.005 2.592 2.592 scf_env_initial_rho_setup 11 6.9 0.002 0.005 2.591 2.592 copy_fm_to_dbcsr 176 11.2 0.001 0.001 1.751 2.572 fft_wrap_pw1pw2_140 487 12.2 0.246 0.247 2.496 2.536 mp_alltoall_i22 627 13.8 1.477 2.321 1.477 2.321 fft3d_ps 1201 13.6 1.060 1.083 2.229 2.267 transfer_fm_to_dbcsr 11 9.9 0.000 0.001 1.439 2.258 multiply_cannon_metrocomm3 9144 15.5 0.030 0.056 1.580 2.257 wfi_extrapolate 11 7.9 0.001 0.003 2.121 2.122 ot_diis_step 108 11.5 0.026 0.028 2.045 2.047 cp_fm_cholesky_invert 11 10.9 1.926 1.933 1.926 1.933 hybrid_alltoall_any 4725 16.4 0.117 0.189 1.426 1.815 make_images_data 4572 15.5 0.066 0.112 1.436 1.801 qs_energies_init_hamiltonians 11 5.9 0.002 0.002 1.711 1.716 apply_preconditioner_dbcsr 119 12.6 0.001 0.001 1.669 1.712 apply_single 119 13.6 0.000 0.001 1.669 1.711 qs_ot_p2m_diag 50 11.0 0.039 0.043 1.657 1.667 potential_pw2rs 119 12.3 0.018 0.021 1.597 1.599 qs_ot_get_derivative_taylor 59 13.0 0.002 0.003 1.429 1.456 qs_ot_get_derivative_diag 49 12.0 0.001 0.002 1.422 1.455 mp_alltoall_d11v 2130 13.8 1.357 1.412 1.357 1.412 cp_dbcsr_syevd 50 12.0 0.003 0.004 1.401 1.406 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.314 1.324 transfer_rs2pw 487 10.6 0.007 0.007 0.978 1.133 mp_allgather_i34 2286 14.5 0.456 1.122 0.456 1.122 cp_fm_diag_elpa 50 13.0 0.000 0.000 1.099 1.100 cp_fm_diag_elpa_base 50 14.0 0.972 0.992 1.095 1.095 qs_env_update_s_mstruct 11 6.9 0.001 0.004 0.992 1.037 mp_alltoall_z22v 1201 15.6 1.015 1.035 1.015 1.035 copy_dbcsr_to_fm 153 11.3 0.003 0.003 0.944 0.982 cp_dbcsr_sm_fm_multiply 37 9.5 0.001 0.001 0.971 0.978 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 0.935 0.958 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="114", plot="h2o_64_md", label="(4n/2r/18t)", y=47.681000, yerr=0.000000 PlotPoint: name="115", plot="h2o_64_md_mem", label="(4n/2r/18t)", y=827.090909, yerr=118.094303 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/11/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 207618048 0.0% 100.0% 0.0% flops 22 x 32 x 32 253755392 0.0% 100.0% 0.0% flops 18 x 128 x 128 8554807296 0.0% 100.0% 0.0% flops 18 x 128 x 1280 19652935680 0.0% 100.0% 0.0% flops 53 x 128 x 128 21590704128 0.0% 100.0% 0.0% flops 32 x 128 x 128 26877100032 0.0% 100.0% 0.0% flops 31 x 128 x 128 31571312640 0.0% 100.0% 0.0% flops 9 x 9 x 128 44168260608 0.0% 100.0% 0.0% flops 40 x 128 x 128 46168801280 0.0% 100.0% 0.0% flops 53 x 128 x 1280 49600266240 0.0% 100.0% 0.0% flops 22 x 9 x 128 53835724800 0.0% 100.0% 0.0% flops 9 x 22 x 128 53885500416 0.0% 100.0% 0.0% flops 22 x 22 x 128 67007283200 0.0% 100.0% 0.0% flops 31 x 128 x 1280 72528691200 0.0% 100.0% 0.0% flops 40 x 128 x 1280 106063462400 0.0% 100.0% 0.0% flops 32 x 128 x 1280 141264158720 0.0% 100.0% 0.0% flops 9 x 32 x 9 185405884416 0.0% 100.0% 0.0% flops 22 x 32 x 9 227871249408 0.0% 100.0% 0.0% flops 9 x 32 x 22 227871249408 0.0% 100.0% 0.0% flops 22 x 32 x 22 279130931200 0.0% 100.0% 0.0% flops inhomo. stacks 320807108608 100.0% 0.0% 0.0% flops total 1.984317E+12 16.2% 83.8% 0.0% flops max/rank 515.157433E+09 16.2% 83.8% 0.0% matmuls inhomo. stacks 63700 100.0% 0.0% 0.0% matmuls total 85771122 0.1% 99.9% 0.0% number of processed stacks 614366 10.4% 89.6% 0.0% average stack size 1.0 155.6 0.0 marketing flops 2.107587E+12 ------------------------------------------------------------------------------- # multiplications 2286 max memory usage/rank 4.465025E+09 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 18288 MPI messages size (bytes): total size 32.347431E+09 min size 0.000000E+00 max size 13.107200E+06 average size 1.768779E+06 MPI breakdown and total messages size (bytes): size <= 128 110 0 128 < size <= 8192 0 0 8192 < size <= 32768 22 720896 32768 < size <= 131072 8480 1111490560 131072 < size <= 4194304 8100 10616832000 4194304 < size <= 16777216 1576 20618391488 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 21 12. MP_Allreduce 12181 16. MP_Alltoall 8655 275585. MP_ISend 18244 454716. MP_IRecv 18244 454782. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3611 63917. MP_Allreduce 10132 514. MP_Sync 54 MP_Alltoall 1201 14305296. MP_SendRecv 1461 1332738. MP_ISendRecv 1461 1332738. MP_Wait 1461 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.082 0.089 56.530 56.533 qs_mol_dyn_low 1 2.0 0.018 0.019 56.015 56.021 qs_forces 11 3.9 0.009 0.010 55.683 55.688 qs_energies 11 4.9 0.006 0.007 52.368 52.374 scf_env_do_scf 11 5.9 0.002 0.004 46.113 46.114 scf_env_do_scf_inner_loop 108 6.5 0.008 0.025 38.661 38.665 velocity_verlet 10 3.0 0.002 0.003 34.470 34.534 qs_rho_update_rho_low 119 7.7 0.001 0.001 16.811 16.823 calculate_rho_elec 119 8.7 0.318 0.324 16.810 16.822 qs_scf_new_mos 108 7.5 0.001 0.001 13.959 13.973 qs_scf_loop_do_ot 108 8.5 0.001 0.001 13.958 13.972 ot_scf_mini 108 9.5 0.003 0.004 13.089 13.097 grid_collocate_task_list 119 9.7 12.351 12.470 12.351 12.470 dbcsr_multiply_generic 2286 12.5 0.155 0.161 11.946 12.325 rebuild_ks_matrix 119 8.3 0.000 0.001 11.844 11.866 qs_ks_build_kohn_sham_matrix 119 9.3 0.015 0.015 11.844 11.865 qs_ks_update_qs_env 119 7.6 0.001 0.001 10.764 10.787 sum_up_and_integrate 119 10.3 0.001 0.002 7.536 7.777 integrate_v_rspace 119 11.3 0.117 0.120 7.522 7.763 init_scf_loop 11 6.9 0.006 0.007 7.400 7.402 ot_mini 108 10.5 0.002 0.002 7.117 7.124 prepare_preconditioner 11 7.9 0.000 0.000 5.913 5.924 make_preconditioner 11 8.9 0.001 0.001 5.913 5.924 multiply_cannon 2286 13.5 0.566 0.571 4.958 5.775 make_full_inverse_cholesky 11 9.9 0.000 0.000 5.482 5.686 grid_integrate_task_list 119 12.3 5.092 5.332 5.092 5.332 qs_ot_get_derivative 108 11.5 0.002 0.002 4.963 4.970 make_m2s 4572 13.5 0.064 0.066 4.425 4.853 fft_wrap_pw1pw2 1201 11.6 0.014 0.014 4.437 4.456 density_rs2pw 119 9.7 0.005 0.005 4.132 4.252 qs_ot_get_p 119 10.4 0.001 0.001 4.045 4.069 fft_wrap_pw1pw2_140 487 12.2 0.468 0.471 3.836 3.854 multiply_cannon_loop 2286 14.5 0.064 0.070 3.561 3.647 make_images 4572 14.5 0.708 0.737 3.133 3.579 fft3d_ps 1201 13.6 1.682 1.712 3.359 3.377 init_scf_run 11 5.9 0.001 0.004 3.232 3.232 scf_env_initial_rho_setup 11 6.9 0.002 0.002 3.230 3.232 cp_fm_cholesky_invert 11 10.9 3.158 3.167 3.158 3.167 multiply_cannon_multrec 4572 15.5 2.894 2.960 2.924 2.989 wfi_extrapolate 11 7.9 0.001 0.001 2.723 2.724 mp_waitall_1 74613 16.8 2.075 2.713 2.075 2.713 qs_energies_init_hamiltonians 11 5.9 0.001 0.001 2.625 2.631 potential_pw2rs 119 12.3 0.028 0.028 2.313 2.315 hybrid_alltoall_any 4725 16.4 0.165 0.319 1.725 2.219 ot_diis_step 108 11.5 0.033 0.033 2.143 2.143 qs_ot_p2m_diag 50 11.0 0.075 0.080 2.106 2.112 make_images_data 4572 15.5 0.055 0.058 1.480 2.065 transfer_rs2pw 487 10.6 0.008 0.008 1.802 1.929 cp_dbcsr_syevd 50 12.0 0.004 0.004 1.808 1.810 qs_ot_get_derivative_taylor 59 13.0 0.002 0.002 1.733 1.739 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.648 1.701 dbcsr_complete_redistribute 329 12.2 0.585 0.602 1.549 1.643 qs_ot_get_derivative_diag 49 12.0 0.001 0.001 1.605 1.615 apply_preconditioner_dbcsr 119 12.6 0.001 0.001 1.538 1.585 apply_single 119 13.6 0.000 0.000 1.537 1.584 qs_env_update_s_mstruct 11 6.9 0.001 0.001 1.489 1.538 transfer_rs2pw_140 130 11.5 0.889 0.903 1.387 1.527 mp_alltoall_z22v 1201 15.6 1.505 1.525 1.505 1.525 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.488 1.493 cp_fm_diag_elpa 50 13.0 0.000 0.000 1.401 1.401 cp_fm_diag_elpa_base 50 14.0 1.242 1.272 1.399 1.399 copy_dbcsr_to_fm 153 11.3 0.003 0.003 1.373 1.377 mp_allgather_i34 2286 14.5 0.485 1.274 0.485 1.274 dbcsr_make_dense_low 5837 15.5 0.050 0.050 1.193 1.218 cp_fm_cholesky_decompose 22 10.9 1.190 1.195 1.190 1.195 transfer_pw2rs 487 13.2 0.003 0.003 1.190 1.192 qs_create_task_list 11 7.9 0.000 0.000 1.123 1.156 generate_qs_task_list 11 8.9 0.678 0.698 1.123 1.155 make_dense_data 5837 16.5 0.742 0.823 1.123 1.148 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="116", plot="h2o_64_md", label="(4n/1r/36t)", y=56.533000, yerr=0.000000 PlotPoint: name="117", plot="h2o_64_md_mem", label="(4n/1r/36t)", y=3127.727273, yerr=915.454193 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/12/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 64 x 64 x 64 23051894784 0.0% 100.0% 0.0% flops 64 x 64 x 32 23051894784 0.0% 100.0% 0.0% flops 32 x 32 x 32 23051894784 0.0% 100.0% 0.0% flops 32 x 32 x 64 23051894784 0.0% 100.0% 0.0% flops 32 x 64 x 64 23051894784 0.0% 100.0% 0.0% flops 32 x 64 x 32 23051894784 0.0% 100.0% 0.0% flops 64 x 32 x 64 23051894784 0.0% 100.0% 0.0% flops 64 x 32 x 32 23051894784 0.0% 100.0% 0.0% flops 64 x 64 x 422 85181071360 0.0% 100.0% 0.0% flops 32 x 32 x 422 85181071360 0.0% 100.0% 0.0% flops 32 x 64 x 422 85181071360 0.0% 100.0% 0.0% flops 64 x 32 x 422 85181071360 0.0% 100.0% 0.0% flops 64 x 64 x 427 86190325760 0.0% 100.0% 0.0% flops 32 x 32 x 427 86190325760 0.0% 100.0% 0.0% flops 32 x 64 x 427 86190325760 0.0% 100.0% 0.0% flops 64 x 32 x 427 86190325760 0.0% 100.0% 0.0% flops 64 x 64 x 431 86997729280 0.0% 100.0% 0.0% flops 32 x 32 x 431 86997729280 0.0% 100.0% 0.0% flops 32 x 64 x 431 86997729280 0.0% 100.0% 0.0% flops 64 x 32 x 431 86997729280 0.0% 100.0% 0.0% flops 422 x 32 x 32 104651030528 0.0% 100.0% 0.0% flops 422 x 32 x 64 104651030528 0.0% 100.0% 0.0% flops 422 x 64 x 64 104651030528 0.0% 100.0% 0.0% flops 422 x 64 x 32 104651030528 0.0% 100.0% 0.0% flops 427 x 32 x 32 105890971648 0.0% 100.0% 0.0% flops 427 x 32 x 64 105890971648 0.0% 100.0% 0.0% flops 427 x 64 x 64 105890971648 0.0% 100.0% 0.0% flops 427 x 64 x 32 105890971648 0.0% 100.0% 0.0% flops 431 x 64 x 64 106882924544 0.0% 100.0% 0.0% flops 431 x 64 x 32 106882924544 0.0% 100.0% 0.0% flops 431 x 32 x 32 106882924544 0.0% 100.0% 0.0% flops 431 x 32 x 64 106882924544 0.0% 100.0% 0.0% flops 9 x 9 x 64 134590242816 0.0% 100.0% 0.0% flops 9 x 9 x 32 134590242816 0.0% 100.0% 0.0% flops 422 x 32 x 422 160475054080 0.0% 100.0% 0.0% flops 422 x 64 x 422 160475054080 0.0% 100.0% 0.0% flops 427 x 32 x 422 162376417280 0.0% 100.0% 0.0% flops 427 x 64 x 422 162376417280 0.0% 100.0% 0.0% flops 422 x 32 x 427 162376417280 0.0% 100.0% 0.0% flops 422 x 64 x 427 162376417280 0.0% 100.0% 0.0% flops 431 x 64 x 422 163897507840 0.0% 100.0% 0.0% flops 422 x 32 x 431 163897507840 0.0% 100.0% 0.0% flops 422 x 64 x 431 163897507840 0.0% 100.0% 0.0% flops 431 x 32 x 422 163897507840 0.0% 100.0% 0.0% flops 427 x 32 x 427 164300308480 0.0% 100.0% 0.0% flops 427 x 64 x 427 164300308480 0.0% 100.0% 0.0% flops 431 x 64 x 427 165839421440 0.0% 100.0% 0.0% flops 427 x 32 x 431 165839421440 0.0% 100.0% 0.0% flops 427 x 64 x 431 165839421440 0.0% 100.0% 0.0% flops 431 x 32 x 427 165839421440 0.0% 100.0% 0.0% flops 431 x 64 x 431 167392952320 0.0% 100.0% 0.0% flops 431 x 32 x 431 167392952320 0.0% 100.0% 0.0% flops 9 x 22 x 64 174697712640 0.0% 100.0% 0.0% flops 9 x 22 x 32 174697712640 0.0% 100.0% 0.0% flops 22 x 9 x 64 175021203456 0.0% 100.0% 0.0% flops 22 x 9 x 32 175021203456 0.0% 100.0% 0.0% flops 22 x 22 x 64 226790907904 0.0% 100.0% 0.0% flops 22 x 22 x 32 226790907904 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 12.884056E+12 0.0% 100.0% 0.0% flops max/rank 137.578432E+09 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 609915708 0.0% 100.0% 0.0% number of processed stacks 5472840 0.0% 100.0% 0.0% average stack size 0.0 111.4 0.0 marketing flops 15.646547E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 212.189184E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 6510240 MPI messages size (bytes): total size 1.243517E+12 min size 0.000000E+00 max size 1.486088E+06 average size 191.009359E+03 MPI breakdown and total messages size (bytes): size <= 128 50820 0 128 < size <= 8192 1301256 10659889152 8192 < size <= 32768 1625844 31963807744 32768 < size <= 131072 1967328 214884679680 131072 < size <= 4194304 1564992 985951934528 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 65 12. MP_Allreduce 11033 25. MP_Alltoall 8043 61599. MP_ISend 98596 100033. MP_IRecv 98596 98808. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3521 65382. MP_Allreduce 9921 489. MP_Sync 492 MP_Alltoall 1939 944079. MP_SendRecv 31460 6552. MP_ISendRecv 31460 6552. MP_Wait 47872 MP_comm_split 48 MP_ISend 26114 37861. MP_IRecv 26114 37861. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.011 0.034 52.535 52.536 qs_mol_dyn_low 1 2.0 0.004 0.004 52.255 52.267 qs_forces 11 3.9 0.003 0.004 52.139 52.153 qs_energies 11 4.9 0.001 0.002 49.377 49.397 scf_env_do_scf 11 5.9 0.000 0.001 44.479 44.480 scf_env_do_scf_inner_loop 99 6.5 0.002 0.007 38.630 38.632 velocity_verlet 10 3.0 0.001 0.002 30.170 30.170 dbcsr_multiply_generic 2055 12.4 0.107 0.149 19.084 19.765 qs_scf_new_mos 99 7.5 0.001 0.001 17.307 17.712 qs_scf_loop_do_ot 99 8.5 0.001 0.001 17.306 17.712 ot_scf_mini 99 9.5 0.002 0.003 16.159 16.507 rebuild_ks_matrix 110 8.3 0.000 0.001 15.504 15.899 qs_ks_build_kohn_sham_matrix 110 9.3 0.012 0.015 15.503 15.898 multiply_cannon 2055 13.4 0.168 0.199 12.156 14.605 qs_ks_update_qs_env 110 7.6 0.001 0.001 13.678 14.036 multiply_cannon_loop 2055 14.4 0.175 0.281 10.874 12.668 sum_up_and_integrate 110 10.3 0.001 0.002 10.522 10.548 integrate_v_rspace 110 11.3 0.003 0.004 10.503 10.527 qs_rho_update_rho_low 110 7.6 0.001 0.001 10.254 10.286 calculate_rho_elec 110 8.6 0.018 0.031 10.254 10.286 mp_waitall_1 264348 16.4 7.184 9.656 7.184 9.656 ot_mini 99 10.5 0.001 0.001 8.856 9.239 multiply_cannon_multrec 24660 15.4 5.849 8.855 5.858 8.864 qs_ot_get_derivative 99 11.5 0.001 0.001 5.740 6.087 init_scf_loop 11 6.9 0.000 0.000 5.809 5.811 grid_integrate_task_list 110 12.3 5.131 5.325 5.131 5.325 make_m2s 4110 13.4 0.072 0.095 4.733 5.003 grid_collocate_task_list 110 9.6 4.587 4.817 4.587 4.817 multiply_cannon_metrocomm3 24660 15.4 0.075 0.157 1.904 4.794 density_rs2pw 110 9.6 0.005 0.008 4.546 4.778 make_images 4110 14.4 0.158 0.207 4.243 4.492 qs_ot_get_p 110 10.4 0.001 0.001 3.715 4.190 potential_pw2rs 110 12.3 0.005 0.008 4.089 4.121 fft_wrap_pw1pw2 1111 11.6 0.012 0.015 3.802 4.005 prepare_preconditioner 11 7.9 0.000 0.000 3.774 3.822 make_preconditioner 11 8.9 0.000 0.000 3.774 3.821 init_scf_run 11 5.9 0.000 0.001 3.595 3.595 scf_env_initial_rho_setup 11 6.9 0.000 0.001 3.594 3.595 mp_waitany 13684 13.7 3.127 3.576 3.127 3.576 fft3d_ps 1111 13.6 0.684 0.957 3.372 3.539 make_full_inverse_cholesky 11 9.9 0.000 0.000 3.437 3.508 multiply_cannon_metrocomm1 24660 15.4 0.085 0.173 2.202 3.400 fft_wrap_pw1pw2_140 451 12.1 0.097 0.106 3.049 3.278 apply_preconditioner_dbcsr 110 12.6 0.000 0.000 2.834 3.190 apply_single 110 13.6 0.000 0.001 2.834 3.190 mp_alltoall_d11v 2046 13.8 2.871 3.099 2.871 3.099 make_images_data 4110 15.4 0.053 0.082 2.571 3.063 ot_diis_step 99 11.5 0.006 0.009 3.061 3.061 transfer_pw2rs 451 13.1 0.005 0.007 3.018 3.042 wfi_extrapolate 11 7.9 0.001 0.001 3.025 3.025 transfer_rs2pw 451 10.6 0.005 0.007 2.722 2.935 hybrid_alltoall_any 4261 16.3 0.077 0.472 2.043 2.823 mp_sum_l 10179 13.1 1.605 2.714 1.605 2.714 qs_ot_get_derivative_diag 47 12.0 0.001 0.002 2.037 2.223 mp_alltoall_z22v 1111 15.6 1.945 2.036 1.945 2.036 qs_ot_p2m_diag 48 11.0 0.008 0.017 1.973 1.996 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 1.954 1.994 mp_allgather_i34 2055 14.4 0.629 1.953 0.629 1.953 qs_ot_get_derivative_taylor 52 13.0 0.001 0.002 1.677 1.842 transfer_pw2rs_50 110 14.3 0.089 0.105 1.545 1.838 cp_dbcsr_syevd 48 12.0 0.003 0.004 1.750 1.754 transfer_rs2pw_50 110 11.6 0.128 0.145 1.496 1.537 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 1.518 1.530 mp_sum_d 3893 11.9 1.065 1.524 1.065 1.524 cp_fm_cholesky_decompose 22 10.9 1.489 1.511 1.489 1.511 rs_gather_matrices 110 12.3 0.096 0.129 1.176 1.443 make_images_sizes 4110 15.4 0.005 0.011 0.910 1.442 mp_alltoall_i44 4110 16.4 0.905 1.438 0.905 1.438 cp_fm_cholesky_invert 11 10.9 1.410 1.417 1.410 1.417 calculate_dm_sparse 110 9.5 0.001 0.001 1.310 1.394 dbcsr_complete_redistribute 325 12.2 0.150 0.183 1.222 1.343 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 1.089 1.224 multiply_cannon_metrocomm4 22605 15.4 0.071 0.154 0.499 1.172 yz_to_x 231 14.8 0.064 0.070 1.121 1.164 cp_fm_diag_elpa 48 13.0 0.000 0.000 1.155 1.157 mp_irecv_dv 59576 16.0 0.480 1.142 0.480 1.142 transfer_pw2rs_140 121 13.9 0.370 0.476 0.998 1.129 rs_scatter_matrices 121 9.7 0.085 0.108 1.093 1.126 cp_fm_redistribute_end 48 14.0 0.568 1.119 0.577 1.124 cp_fm_diag_elpa_base 48 14.0 0.538 1.080 0.544 1.091 transfer_rs2pw_140 121 11.5 0.255 0.297 0.838 1.067 copy_dbcsr_to_fm 151 11.3 0.002 0.003 0.902 1.063 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="200", plot="h2o_128_md", label="(4n/36r/1t)", y=52.536000, yerr=0.000000 PlotPoint: name="201", plot="h2o_128_md_mem", label="(4n/36r/1t)", y=201.636364, yerr=1.871933 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/13/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 360 x 32 x 422 8556134400 0.0% 100.0% 0.0% flops 360 x 64 x 422 8556134400 0.0% 100.0% 0.0% flops 378 x 32 x 422 8983941120 0.0% 100.0% 0.0% flops 378 x 64 x 422 8983941120 0.0% 100.0% 0.0% flops 382 x 64 x 422 9079009280 0.0% 100.0% 0.0% flops 382 x 32 x 422 9079009280 0.0% 100.0% 0.0% flops 458 x 32 x 422 10885304320 0.0% 100.0% 0.0% flops 458 x 64 x 422 10885304320 0.0% 100.0% 0.0% flops 471 x 32 x 422 11194275840 0.0% 100.0% 0.0% flops 471 x 64 x 422 11194275840 0.0% 100.0% 0.0% flops 480 x 64 x 422 11408179200 0.0% 100.0% 0.0% flops 480 x 32 x 422 11408179200 0.0% 100.0% 0.0% flops 493 x 32 x 422 11717150720 0.0% 100.0% 0.0% flops 493 x 64 x 422 11717150720 0.0% 100.0% 0.0% flops 32 x 64 x 422 21295267840 0.0% 100.0% 0.0% flops 64 x 64 x 422 21295267840 0.0% 100.0% 0.0% flops 32 x 32 x 422 21295267840 0.0% 100.0% 0.0% flops 64 x 32 x 422 21295267840 0.0% 100.0% 0.0% flops 449 x 32 x 422 21342801920 0.0% 100.0% 0.0% flops 449 x 64 x 422 21342801920 0.0% 100.0% 0.0% flops 360 x 32 x 32 22318940160 0.0% 100.0% 0.0% flops 360 x 32 x 64 22318940160 0.0% 100.0% 0.0% flops 360 x 64 x 32 22318940160 0.0% 100.0% 0.0% flops 360 x 64 x 64 22318940160 0.0% 100.0% 0.0% flops 32 x 64 x 64 23051894784 0.0% 100.0% 0.0% flops 32 x 64 x 32 23051894784 0.0% 100.0% 0.0% flops 64 x 64 x 64 23051894784 0.0% 100.0% 0.0% flops 64 x 64 x 32 23051894784 0.0% 100.0% 0.0% flops 32 x 32 x 32 23051894784 0.0% 100.0% 0.0% flops 32 x 32 x 64 23051894784 0.0% 100.0% 0.0% flops 64 x 32 x 32 23051894784 0.0% 100.0% 0.0% flops 64 x 32 x 64 23051894784 0.0% 100.0% 0.0% flops 378 x 32 x 32 23434887168 0.0% 100.0% 0.0% flops 378 x 32 x 64 23434887168 0.0% 100.0% 0.0% flops 378 x 64 x 32 23434887168 0.0% 100.0% 0.0% flops 378 x 64 x 64 23434887168 0.0% 100.0% 0.0% flops 382 x 64 x 64 23682875392 0.0% 100.0% 0.0% flops 382 x 64 x 32 23682875392 0.0% 100.0% 0.0% flops 382 x 32 x 32 23682875392 0.0% 100.0% 0.0% flops 382 x 32 x 64 23682875392 0.0% 100.0% 0.0% flops 458 x 32 x 32 28394651648 0.0% 100.0% 0.0% flops 458 x 32 x 64 28394651648 0.0% 100.0% 0.0% flops 458 x 64 x 32 28394651648 0.0% 100.0% 0.0% flops 458 x 64 x 64 28394651648 0.0% 100.0% 0.0% flops 400 x 32 x 422 28520448000 0.0% 100.0% 0.0% flops 400 x 64 x 422 28520448000 0.0% 100.0% 0.0% flops 471 x 32 x 32 29200613376 0.0% 100.0% 0.0% flops 471 x 32 x 64 29200613376 0.0% 100.0% 0.0% flops 471 x 64 x 32 29200613376 0.0% 100.0% 0.0% flops 471 x 64 x 64 29200613376 0.0% 100.0% 0.0% flops 480 x 64 x 64 29758586880 0.0% 100.0% 0.0% flops 480 x 64 x 32 29758586880 0.0% 100.0% 0.0% flops 480 x 32 x 32 29758586880 0.0% 100.0% 0.0% flops 480 x 32 x 64 29758586880 0.0% 100.0% 0.0% flops 493 x 32 x 32 30564548608 0.0% 100.0% 0.0% flops 493 x 32 x 64 30564548608 0.0% 100.0% 0.0% flops 493 x 64 x 32 30564548608 0.0% 100.0% 0.0% flops 493 x 64 x 64 30564548608 0.0% 100.0% 0.0% flops 360 x 32 x 418 33900134400 0.0% 100.0% 0.0% flops 360 x 64 x 418 33900134400 0.0% 100.0% 0.0% flops 378 x 32 x 418 35595141120 0.0% 100.0% 0.0% flops 378 x 64 x 418 35595141120 0.0% 100.0% 0.0% flops 382 x 64 x 418 35971809280 0.0% 100.0% 0.0% flops 382 x 32 x 418 35971809280 0.0% 100.0% 0.0% flops 458 x 32 x 418 43128504320 0.0% 100.0% 0.0% flops 458 x 64 x 418 43128504320 0.0% 100.0% 0.0% flops 471 x 32 x 418 44352675840 0.0% 100.0% 0.0% flops 471 x 64 x 418 44352675840 0.0% 100.0% 0.0% flops 480 x 64 x 418 45200179200 0.0% 100.0% 0.0% flops 480 x 32 x 418 45200179200 0.0% 100.0% 0.0% flops 493 x 32 x 418 46424350720 0.0% 100.0% 0.0% flops 493 x 64 x 418 46424350720 0.0% 100.0% 0.0% flops 360 x 32 x 431 52431667200 0.0% 100.0% 0.0% flops 360 x 64 x 431 52431667200 0.0% 100.0% 0.0% flops 378 x 32 x 431 55053250560 0.0% 100.0% 0.0% flops 378 x 64 x 431 55053250560 0.0% 100.0% 0.0% flops 382 x 64 x 431 55635824640 0.0% 100.0% 0.0% flops 382 x 32 x 431 55635824640 0.0% 100.0% 0.0% flops 449 x 32 x 32 55673356288 0.0% 100.0% 0.0% flops 449 x 32 x 64 55673356288 0.0% 100.0% 0.0% flops 449 x 64 x 32 55673356288 0.0% 100.0% 0.0% flops 449 x 64 x 64 55673356288 0.0% 100.0% 0.0% flops 458 x 32 x 431 66704732160 0.0% 100.0% 0.0% flops 458 x 64 x 431 66704732160 0.0% 100.0% 0.0% flops 471 x 32 x 431 68598097920 0.0% 100.0% 0.0% flops 471 x 64 x 431 68598097920 0.0% 100.0% 0.0% flops 480 x 64 x 431 69908889600 0.0% 100.0% 0.0% flops 480 x 32 x 431 69908889600 0.0% 100.0% 0.0% flops 493 x 32 x 431 71802255360 0.0% 100.0% 0.0% flops 493 x 64 x 431 71802255360 0.0% 100.0% 0.0% flops 400 x 32 x 32 74396467200 0.0% 100.0% 0.0% flops 400 x 32 x 64 74396467200 0.0% 100.0% 0.0% flops 400 x 64 x 32 74396467200 0.0% 100.0% 0.0% flops 400 x 64 x 64 74396467200 0.0% 100.0% 0.0% flops 32 x 64 x 418 84373667840 0.0% 100.0% 0.0% flops 64 x 64 x 418 84373667840 0.0% 100.0% 0.0% flops 32 x 32 x 418 84373667840 0.0% 100.0% 0.0% flops 64 x 32 x 418 84373667840 0.0% 100.0% 0.0% flops 449 x 32 x 418 84562001920 0.0% 100.0% 0.0% flops 449 x 64 x 418 84562001920 0.0% 100.0% 0.0% flops 400 x 32 x 418 113000448000 0.0% 100.0% 0.0% flops 400 x 64 x 418 113000448000 0.0% 100.0% 0.0% flops 32 x 64 x 431 130496593920 0.0% 100.0% 0.0% flops 64 x 64 x 431 130496593920 0.0% 100.0% 0.0% flops 32 x 32 x 431 130496593920 0.0% 100.0% 0.0% flops 64 x 32 x 431 130496593920 0.0% 100.0% 0.0% flops 449 x 32 x 431 130787880960 0.0% 100.0% 0.0% flops 449 x 64 x 431 130787880960 0.0% 100.0% 0.0% flops 9 x 9 x 64 134590242816 0.0% 100.0% 0.0% flops 9 x 9 x 32 134590242816 0.0% 100.0% 0.0% flops 9 x 22 x 64 174697712640 0.0% 100.0% 0.0% flops 9 x 22 x 32 174697712640 0.0% 100.0% 0.0% flops 400 x 32 x 431 174772224000 0.0% 100.0% 0.0% flops 400 x 64 x 431 174772224000 0.0% 100.0% 0.0% flops 22 x 9 x 64 175021203456 0.0% 100.0% 0.0% flops 22 x 9 x 32 175021203456 0.0% 100.0% 0.0% flops 22 x 22 x 64 226790907904 0.0% 100.0% 0.0% flops 22 x 22 x 32 226790907904 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 388562944000 100.0% 0.0% 0.0% flops total 12.930049E+12 3.0% 97.0% 0.0% flops max/rank 265.896255E+09 3.3% 96.7% 0.0% matmuls inhomo. stacks 71280 100.0% 0.0% 0.0% matmuls total 609915708 0.0% 100.0% 0.0% number of processed stacks 5475744 1.3% 98.7% 0.0% average stack size 1.0 112.8 0.0 marketing flops 15.646547E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 258.916352E+06 # max total images/rank 2 # max 3D layers 1 # MPI messages exchanged 3107160 MPI messages size (bytes): total size 1.118966E+12 min size 0.000000E+00 max size 3.034240E+06 average size 360.125031E+03 MPI breakdown and total messages size (bytes): size <= 128 23892 0 128 < size <= 8192 371640 3044474880 8192 < size <= 32768 887412 19031326720 32768 < size <= 131072 539952 51713409024 131072 < size <= 4194304 1284264 1045217450048 4194304 < size <= 16777216 0 0 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3521 65473. MP_Allreduce 9920 520. MP_Sync 52 MP_Alltoall 1938 1990983. MP_SendRecv 15620 11120. MP_ISendRecv 15620 11120. MP_Wait 31988 MP_comm_split 48 MP_ISend 14300 93624. MP_IRecv 14300 93624. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.040 0.063 77.065 77.066 qs_mol_dyn_low 1 2.0 0.008 0.019 76.660 76.670 qs_forces 11 3.9 0.003 0.004 76.510 76.546 qs_energies 11 4.9 0.002 0.003 72.292 72.336 scf_env_do_scf 11 5.9 0.001 0.003 65.279 65.281 scf_env_do_scf_inner_loop 99 6.5 0.003 0.034 56.953 56.968 velocity_verlet 10 3.0 0.002 0.005 44.824 44.826 dbcsr_multiply_generic 2055 12.4 0.152 0.161 31.301 32.327 qs_scf_new_mos 99 7.5 0.001 0.002 26.838 27.524 qs_scf_loop_do_ot 99 8.5 0.001 0.001 26.837 27.523 ot_scf_mini 99 9.5 0.003 0.004 25.132 25.677 multiply_cannon 2055 13.4 0.234 0.265 21.502 24.484 rebuild_ks_matrix 110 8.3 0.001 0.001 22.194 22.832 qs_ks_build_kohn_sham_matrix 110 9.3 0.015 0.020 22.193 22.832 multiply_cannon_loop 2055 14.4 0.302 0.323 19.466 21.772 qs_ks_update_qs_env 110 7.6 0.001 0.001 19.609 20.175 multiply_cannon_multrec 24660 15.4 14.071 19.229 14.090 19.249 ot_mini 99 10.5 0.001 0.001 14.102 14.702 qs_rho_update_rho_low 110 7.6 0.001 0.001 14.085 14.115 calculate_rho_elec 110 8.6 0.034 0.045 14.085 14.114 sum_up_and_integrate 110 10.3 0.002 0.005 13.771 13.795 integrate_v_rspace 110 11.3 0.003 0.004 13.724 13.752 mp_waitall_1 198528 16.4 6.579 12.887 6.579 12.887 multiply_cannon_metrocomm3 24660 15.4 0.098 0.112 2.929 9.499 grid_collocate_task_list 110 9.6 8.776 9.114 8.776 9.114 grid_integrate_task_list 110 12.3 8.534 9.055 8.534 9.055 init_scf_loop 11 6.9 0.000 0.001 8.277 8.289 qs_ot_get_derivative 99 11.5 0.001 0.001 7.346 7.900 apply_preconditioner_dbcsr 110 12.6 0.000 0.000 6.376 6.950 apply_single 110 13.6 0.001 0.001 6.376 6.950 make_m2s 4110 13.4 0.098 0.104 6.473 6.735 ot_diis_step 99 11.5 0.012 0.013 6.684 6.685 make_images 4110 14.4 0.470 0.554 5.510 5.883 qs_ot_get_p 110 10.4 0.001 0.001 4.718 5.414 prepare_preconditioner 11 7.9 0.000 0.000 5.198 5.271 make_preconditioner 11 8.9 0.000 0.000 5.198 5.271 init_scf_run 11 5.9 0.000 0.007 5.093 5.093 scf_env_initial_rho_setup 11 6.9 0.001 0.007 5.092 5.093 make_full_inverse_cholesky 11 9.9 0.000 0.000 4.788 4.949 fft_wrap_pw1pw2 1111 11.6 0.019 0.027 4.754 4.909 density_rs2pw 110 9.6 0.007 0.009 4.260 4.623 wfi_extrapolate 11 7.9 0.001 0.002 4.461 4.461 fft_wrap_pw1pw2_140 451 12.1 0.189 0.197 4.100 4.302 fft3d_ps 1111 13.6 1.440 1.750 3.883 4.070 mp_sum_l 10179 13.1 2.266 4.007 2.266 4.007 multiply_cannon_metrocomm4 22605 15.4 0.100 0.112 1.418 3.875 potential_pw2rs 110 12.3 0.010 0.011 3.787 3.818 mp_irecv_dv 61724 16.0 1.327 3.810 1.327 3.810 make_images_data 4110 15.4 0.066 0.072 3.222 3.673 hybrid_alltoall_any 4261 16.3 0.139 0.651 2.738 3.460 mp_alltoall_d11v 2046 13.8 2.831 3.171 2.831 3.171 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.838 2.910 qs_ot_get_derivative_diag 47 12.0 0.001 0.002 2.454 2.748 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.003 2.535 2.541 transfer_pw2rs 451 13.1 0.007 0.011 2.475 2.488 qs_ot_p2m_diag 48 11.0 0.018 0.034 2.429 2.456 qs_ot_get_derivative_taylor 52 13.0 0.002 0.002 2.111 2.374 mp_waitany 14300 13.8 1.745 2.334 1.745 2.334 cp_fm_cholesky_invert 11 10.9 2.280 2.292 2.280 2.292 transfer_rs2pw 451 10.6 0.007 0.011 1.944 2.237 cp_dbcsr_syevd 48 12.0 0.004 0.004 2.198 2.204 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 1.949 2.156 mp_alltoall_z22v 1111 15.6 1.986 2.151 1.986 2.151 calculate_dm_sparse 110 9.5 0.001 0.001 1.942 2.119 mp_sum_d 3893 11.9 1.339 2.074 1.339 2.074 mp_allgather_i34 2055 14.4 0.550 1.813 0.550 1.813 dbcsr_dot_sd 1091 11.9 0.316 0.351 1.117 1.734 dbcsr_complete_redistribute 325 12.2 0.245 0.279 1.432 1.613 rs_gather_matrices 110 12.3 0.171 0.188 1.297 1.604 transfer_pw2rs_140 121 13.9 0.493 0.564 1.506 1.601 cp_fm_cholesky_decompose 22 10.9 1.552 1.592 1.552 1.592 cp_fm_diag_elpa 48 13.0 0.000 0.000 1.554 1.555 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="202", plot="h2o_128_md", label="(4n/18r/2t)", y=77.066000, yerr=0.000000 PlotPoint: name="203", plot="h2o_128_md_mem", label="(4n/18r/2t)", y=245.090909, yerr=2.193152 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/14/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 436 x 64 x 409 10043207680 0.0% 100.0% 0.0% flops 436 x 32 x 409 10043207680 0.0% 100.0% 0.0% flops 440 x 32 x 409 10135347200 0.0% 100.0% 0.0% flops 440 x 64 x 409 10135347200 0.0% 100.0% 0.0% flops 449 x 32 x 409 10342661120 0.0% 100.0% 0.0% flops 449 x 64 x 409 10342661120 0.0% 100.0% 0.0% flops 458 x 32 x 409 10549975040 0.0% 100.0% 0.0% flops 458 x 64 x 409 10549975040 0.0% 100.0% 0.0% flops 462 x 32 x 409 10642114560 0.0% 100.0% 0.0% flops 462 x 64 x 409 10642114560 0.0% 100.0% 0.0% flops 493 x 64 x 409 11356195840 0.0% 100.0% 0.0% flops 493 x 32 x 409 11356195840 0.0% 100.0% 0.0% flops 32 x 64 x 409 20639252480 0.0% 100.0% 0.0% flops 64 x 64 x 409 20639252480 0.0% 100.0% 0.0% flops 32 x 32 x 409 20639252480 0.0% 100.0% 0.0% flops 64 x 32 x 409 20639252480 0.0% 100.0% 0.0% flops 471 x 32 x 409 21698856960 0.0% 100.0% 0.0% flops 471 x 64 x 409 21698856960 0.0% 100.0% 0.0% flops 32 x 64 x 64 23051894784 0.0% 100.0% 0.0% flops 32 x 64 x 32 23051894784 0.0% 100.0% 0.0% flops 64 x 64 x 64 23051894784 0.0% 100.0% 0.0% flops 64 x 64 x 32 23051894784 0.0% 100.0% 0.0% flops 32 x 32 x 32 23051894784 0.0% 100.0% 0.0% flops 32 x 32 x 64 23051894784 0.0% 100.0% 0.0% flops 64 x 32 x 32 23051894784 0.0% 100.0% 0.0% flops 64 x 32 x 64 23051894784 0.0% 100.0% 0.0% flops 436 x 64 x 64 27030716416 0.0% 100.0% 0.0% flops 436 x 64 x 32 27030716416 0.0% 100.0% 0.0% flops 436 x 32 x 32 27030716416 0.0% 100.0% 0.0% flops 436 x 32 x 64 27030716416 0.0% 100.0% 0.0% flops 440 x 32 x 32 27278704640 0.0% 100.0% 0.0% flops 440 x 32 x 64 27278704640 0.0% 100.0% 0.0% flops 440 x 64 x 64 27278704640 0.0% 100.0% 0.0% flops 440 x 64 x 32 27278704640 0.0% 100.0% 0.0% flops 449 x 32 x 32 27836678144 0.0% 100.0% 0.0% flops 449 x 32 x 64 27836678144 0.0% 100.0% 0.0% flops 449 x 64 x 64 27836678144 0.0% 100.0% 0.0% flops 449 x 64 x 32 27836678144 0.0% 100.0% 0.0% flops 458 x 32 x 32 28394651648 0.0% 100.0% 0.0% flops 458 x 32 x 64 28394651648 0.0% 100.0% 0.0% flops 458 x 64 x 64 28394651648 0.0% 100.0% 0.0% flops 458 x 64 x 32 28394651648 0.0% 100.0% 0.0% flops 462 x 32 x 32 28642639872 0.0% 100.0% 0.0% flops 462 x 32 x 64 28642639872 0.0% 100.0% 0.0% flops 462 x 64 x 64 28642639872 0.0% 100.0% 0.0% flops 462 x 64 x 32 28642639872 0.0% 100.0% 0.0% flops 493 x 64 x 64 30564548608 0.0% 100.0% 0.0% flops 493 x 64 x 32 30564548608 0.0% 100.0% 0.0% flops 493 x 32 x 32 30564548608 0.0% 100.0% 0.0% flops 493 x 32 x 64 30564548608 0.0% 100.0% 0.0% flops 436 x 64 x 418 30792622080 0.0% 100.0% 0.0% flops 436 x 32 x 418 30792622080 0.0% 100.0% 0.0% flops 440 x 32 x 418 31075123200 0.0% 100.0% 0.0% flops 440 x 64 x 418 31075123200 0.0% 100.0% 0.0% flops 449 x 32 x 418 31710750720 0.0% 100.0% 0.0% flops 449 x 64 x 418 31710750720 0.0% 100.0% 0.0% flops 458 x 32 x 418 32346378240 0.0% 100.0% 0.0% flops 458 x 64 x 418 32346378240 0.0% 100.0% 0.0% flops 462 x 32 x 418 32628879360 0.0% 100.0% 0.0% flops 462 x 64 x 418 32628879360 0.0% 100.0% 0.0% flops 360 x 64 x 409 33170227200 0.0% 100.0% 0.0% flops 360 x 32 x 409 33170227200 0.0% 100.0% 0.0% flops 493 x 64 x 418 34818263040 0.0% 100.0% 0.0% flops 493 x 32 x 418 34818263040 0.0% 100.0% 0.0% flops 471 x 32 x 32 58401226752 0.0% 100.0% 0.0% flops 471 x 32 x 64 58401226752 0.0% 100.0% 0.0% flops 471 x 64 x 64 58401226752 0.0% 100.0% 0.0% flops 471 x 64 x 32 58401226752 0.0% 100.0% 0.0% flops 32 x 64 x 418 63280250880 0.0% 100.0% 0.0% flops 64 x 64 x 418 63280250880 0.0% 100.0% 0.0% flops 32 x 32 x 418 63280250880 0.0% 100.0% 0.0% flops 64 x 32 x 418 63280250880 0.0% 100.0% 0.0% flops 471 x 32 x 418 66529013760 0.0% 100.0% 0.0% flops 471 x 64 x 418 66529013760 0.0% 100.0% 0.0% flops 436 x 64 x 431 74084003840 0.0% 100.0% 0.0% flops 436 x 32 x 431 74084003840 0.0% 100.0% 0.0% flops 440 x 32 x 431 74763673600 0.0% 100.0% 0.0% flops 440 x 64 x 431 74763673600 0.0% 100.0% 0.0% flops 449 x 32 x 431 76292930560 0.0% 100.0% 0.0% flops 449 x 64 x 431 76292930560 0.0% 100.0% 0.0% flops 458 x 32 x 431 77822187520 0.0% 100.0% 0.0% flops 458 x 64 x 431 77822187520 0.0% 100.0% 0.0% flops 462 x 32 x 431 78501857280 0.0% 100.0% 0.0% flops 462 x 64 x 431 78501857280 0.0% 100.0% 0.0% flops 493 x 64 x 431 83769297920 0.0% 100.0% 0.0% flops 493 x 32 x 431 83769297920 0.0% 100.0% 0.0% flops 360 x 64 x 64 89275760640 0.0% 100.0% 0.0% flops 360 x 64 x 32 89275760640 0.0% 100.0% 0.0% flops 360 x 32 x 32 89275760640 0.0% 100.0% 0.0% flops 360 x 32 x 64 89275760640 0.0% 100.0% 0.0% flops 360 x 64 x 418 101700403200 0.0% 100.0% 0.0% flops 360 x 32 x 418 101700403200 0.0% 100.0% 0.0% flops 9 x 9 x 64 134590242816 0.0% 100.0% 0.0% flops 9 x 9 x 32 134590242816 0.0% 100.0% 0.0% flops 32 x 64 x 431 152246026240 0.0% 100.0% 0.0% flops 64 x 64 x 431 152246026240 0.0% 100.0% 0.0% flops 32 x 32 x 431 152246026240 0.0% 100.0% 0.0% flops 64 x 32 x 431 152246026240 0.0% 100.0% 0.0% flops 471 x 32 x 431 160062228480 0.0% 100.0% 0.0% flops 471 x 64 x 431 160062228480 0.0% 100.0% 0.0% flops 9 x 22 x 64 174697712640 0.0% 100.0% 0.0% flops 9 x 22 x 32 174697712640 0.0% 100.0% 0.0% flops 22 x 9 x 64 175021203456 0.0% 100.0% 0.0% flops 22 x 9 x 32 175021203456 0.0% 100.0% 0.0% flops 22 x 22 x 64 226790907904 0.0% 100.0% 0.0% flops 22 x 22 x 32 226790907904 0.0% 100.0% 0.0% flops 360 x 64 x 431 244681113600 0.0% 100.0% 0.0% flops 360 x 32 x 431 244681113600 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 415276646400 100.0% 0.0% 0.0% flops total 12.956763E+12 3.2% 96.8% 0.0% flops max/rank 393.280399E+09 3.4% 96.6% 0.0% matmuls inhomo. stacks 71280 100.0% 0.0% 0.0% matmuls total 609915708 0.0% 100.0% 0.0% number of processed stacks 5475744 1.3% 98.7% 0.0% average stack size 1.0 112.8 0.0 marketing flops 15.646302E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 304.934912E+06 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 1972800 MPI messages size (bytes): total size 1.077520E+12 min size 0.000000E+00 max size 4.537280E+06 average size 546.188250E+03 MPI breakdown and total messages size (bytes): size <= 128 14916 0 128 < size <= 8192 222984 1826684928 8192 < size <= 32768 520356 13399818240 32768 < size <= 131072 372336 35386294272 131072 < size <= 4194304 787758 788321309808 4194304 < size <= 16777216 54450 238588003280 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3521 65581. MP_Allreduce 9919 558. MP_Sync 52 MP_Alltoall 1717 2433540. MP_SendRecv 10340 26400. MP_ISendRecv 10340 26400. MP_Wait 22352 MP_comm_split 48 MP_ISend 10164 155761. MP_IRecv 10164 155761. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.016 0.041 73.136 73.197 qs_mol_dyn_low 1 2.0 0.011 0.020 72.371 72.850 qs_forces 11 3.9 0.003 0.003 72.034 72.105 qs_energies 11 4.9 0.001 0.002 68.387 68.452 scf_env_do_scf 11 5.9 0.000 0.002 61.767 61.818 scf_env_do_scf_inner_loop 99 6.5 0.003 0.023 53.106 53.153 velocity_verlet 10 3.0 0.003 0.006 42.667 42.710 dbcsr_multiply_generic 2055 12.4 0.152 0.207 31.316 32.465 qs_scf_new_mos 99 7.5 0.001 0.001 28.063 28.614 qs_scf_loop_do_ot 99 8.5 0.001 0.001 28.062 28.613 ot_scf_mini 99 9.5 0.003 0.004 26.313 26.821 multiply_cannon 2055 13.4 0.230 0.272 20.981 24.686 multiply_cannon_loop 2055 14.4 0.276 0.389 19.150 22.122 multiply_cannon_multrec 24660 15.4 12.575 19.930 12.594 19.949 rebuild_ks_matrix 110 8.3 0.001 0.001 18.648 19.332 qs_ks_build_kohn_sham_matrix 110 9.3 0.014 0.017 18.647 19.332 qs_ks_update_qs_env 110 7.6 0.001 0.001 16.482 17.104 mp_waitall_1 176588 16.5 7.499 15.523 7.499 15.523 ot_mini 99 10.5 0.001 0.001 14.455 15.001 multiply_cannon_metrocomm3 24660 15.4 0.097 0.195 4.180 11.764 qs_rho_update_rho_low 110 7.6 0.001 0.001 11.530 11.549 calculate_rho_elec 110 8.6 0.049 0.059 11.530 11.548 sum_up_and_integrate 110 10.3 0.001 0.003 11.185 11.240 integrate_v_rspace 110 11.3 0.003 0.004 11.156 11.225 qs_ot_get_derivative 99 11.5 0.001 0.002 8.134 8.645 init_scf_loop 11 6.9 0.000 0.000 8.613 8.620 grid_integrate_task_list 110 12.3 7.063 7.412 7.063 7.412 grid_collocate_task_list 110 9.6 6.884 7.320 6.884 7.320 make_m2s 4110 13.4 0.099 0.130 6.731 7.089 apply_preconditioner_dbcsr 110 12.6 0.000 0.001 5.822 6.312 apply_single 110 13.6 0.000 0.001 5.822 6.312 ot_diis_step 99 11.5 0.013 0.015 6.214 6.220 qs_ot_get_p 110 10.4 0.001 0.001 5.426 6.125 prepare_preconditioner 11 7.9 0.000 0.000 5.920 5.992 make_preconditioner 11 8.9 0.000 0.000 5.920 5.992 make_images 4110 14.4 0.464 0.542 5.727 5.978 make_full_inverse_cholesky 11 9.9 0.000 0.000 5.482 5.679 init_scf_run 11 5.9 0.000 0.003 4.698 4.701 scf_env_initial_rho_setup 11 6.9 0.001 0.003 4.697 4.701 density_rs2pw 110 9.6 0.005 0.009 3.762 4.594 fft_wrap_pw1pw2 1111 11.6 0.014 0.018 4.152 4.467 wfi_extrapolate 11 7.9 0.001 0.001 4.084 4.087 mp_sum_l 10179 13.1 2.564 4.060 2.564 4.060 fft3d_ps 1111 13.6 1.108 1.461 3.480 4.037 multiply_cannon_metrocomm4 22605 15.4 0.098 0.201 1.727 3.974 mp_irecv_dv 57340 16.2 1.597 3.831 1.597 3.831 fft_wrap_pw1pw2_140 451 12.1 0.140 0.167 3.537 3.734 make_images_data 4110 15.4 0.066 0.110 3.171 3.550 hybrid_alltoall_any 4261 16.3 0.117 0.434 2.684 3.122 qs_ot_get_derivative_diag 47 12.0 0.001 0.002 2.790 3.073 potential_pw2rs 110 12.3 0.010 0.014 2.983 3.061 mp_alltoall_d11v 2046 13.8 2.450 3.060 2.450 3.060 mp_alltoall_z22v 1111 15.6 2.104 2.915 2.104 2.915 mp_waitany 10164 13.8 1.782 2.866 1.782 2.866 qs_ot_p2m_diag 48 11.0 0.026 0.044 2.769 2.793 transfer_rs2pw 451 10.6 0.006 0.007 1.859 2.731 cp_fm_cholesky_invert 11 10.9 2.625 2.643 2.625 2.643 qs_ot_get_derivative_taylor 52 13.0 0.002 0.002 2.315 2.553 cp_dbcsr_syevd 48 12.0 0.003 0.004 2.458 2.465 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.385 2.447 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 2.385 2.389 mp_sum_d 3893 11.9 1.445 2.298 1.445 2.298 calculate_dm_sparse 110 9.5 0.001 0.001 1.976 2.094 transfer_rs2pw_140 121 11.5 0.227 0.268 1.198 2.059 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 1.821 2.045 transfer_pw2rs 451 13.1 0.005 0.007 1.831 1.848 cp_fm_diag_elpa 48 13.0 0.000 0.000 1.831 1.833 cp_fm_redistribute_end 48 14.0 0.908 1.799 0.914 1.802 cp_fm_diag_elpa_base 48 14.0 0.796 1.664 0.880 1.767 make_images_sizes 4110 15.4 0.006 0.013 1.176 1.762 mp_alltoall_i44 4110 16.4 1.170 1.757 1.170 1.757 dbcsr_complete_redistribute 325 12.2 0.277 0.322 1.436 1.712 dbcsr_dot_sd 1091 11.9 0.277 0.351 1.180 1.689 qs_ot_get_orbitals 99 10.5 0.001 0.001 1.587 1.683 yz_to_x 451 14.2 0.085 0.093 1.215 1.646 cp_fm_cholesky_decompose 22 10.9 1.568 1.635 1.568 1.635 mp_allgather_i34 2055 14.4 0.720 1.607 0.720 1.607 rs_gather_matrices 110 12.3 0.124 0.144 1.029 1.552 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="204", plot="h2o_128_md", label="(4n/12r/3t)", y=73.197000, yerr=0.000000 PlotPoint: name="205", plot="h2o_128_md_mem", label="(4n/12r/3t)", y=285.363636, yerr=3.170108 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/15/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 64 x 64 x 64 1440743424 0.0% 100.0% 0.0% flops 151 x 64 x 64 2340388864 0.0% 100.0% 0.0% flops 173 x 64 x 64 2681372672 0.0% 100.0% 0.0% flops 178 x 64 x 64 2758868992 0.0% 100.0% 0.0% flops 271 x 64 x 64 4200300544 0.0% 100.0% 0.0% flops 64 x 96 x 64 4322230272 0.0% 100.0% 0.0% flops 64 x 64 x 96 4322230272 0.0% 100.0% 0.0% flops 151 x 64 x 96 7021166592 0.0% 100.0% 0.0% flops 151 x 96 x 64 7021166592 0.0% 100.0% 0.0% flops 151 x 64 x 849 7220167680 0.0% 100.0% 0.0% flops 151 x 64 x 853 7254184960 0.0% 100.0% 0.0% flops 151 x 64 x 858 7296706560 0.0% 100.0% 0.0% flops 169 x 64 x 64 7858126848 0.0% 100.0% 0.0% flops 173 x 64 x 96 8044118016 0.0% 100.0% 0.0% flops 173 x 96 x 64 8044118016 0.0% 100.0% 0.0% flops 173 x 64 x 849 8272112640 0.0% 100.0% 0.0% flops 178 x 64 x 96 8276606976 0.0% 100.0% 0.0% flops 178 x 96 x 64 8276606976 0.0% 100.0% 0.0% flops 173 x 64 x 853 8311086080 0.0% 100.0% 0.0% flops 173 x 64 x 858 8359802880 0.0% 100.0% 0.0% flops 178 x 64 x 849 8511191040 0.0% 100.0% 0.0% flops 178 x 64 x 853 8551290880 0.0% 100.0% 0.0% flops 178 x 64 x 858 8601415680 0.0% 100.0% 0.0% flops 32 x 64 x 64 10085203968 0.0% 100.0% 0.0% flops 218 x 64 x 64 10136518656 0.0% 100.0% 0.0% flops 64 x 64 x 849 10710712320 0.0% 100.0% 0.0% flops 64 x 64 x 853 10761175040 0.0% 100.0% 0.0% flops 64 x 64 x 858 10824253440 0.0% 100.0% 0.0% flops 271 x 96 x 64 12600901632 0.0% 100.0% 0.0% flops 271 x 64 x 96 12600901632 0.0% 100.0% 0.0% flops 209 x 64 x 64 12957384704 0.0% 100.0% 0.0% flops 271 x 64 x 849 12958049280 0.0% 100.0% 0.0% flops 64 x 96 x 96 12966690816 0.0% 100.0% 0.0% flops 271 x 64 x 853 13019100160 0.0% 100.0% 0.0% flops 271 x 64 x 858 13095413760 0.0% 100.0% 0.0% flops 151 x 96 x 96 21063499776 0.0% 100.0% 0.0% flops 151 x 96 x 849 21660503040 0.0% 100.0% 0.0% flops 151 x 96 x 853 21762554880 0.0% 100.0% 0.0% flops 151 x 96 x 858 21890119680 0.0% 100.0% 0.0% flops 169 x 96 x 64 23574380544 0.0% 100.0% 0.0% flops 169 x 64 x 96 23574380544 0.0% 100.0% 0.0% flops 173 x 96 x 96 24132354048 0.0% 100.0% 0.0% flops 169 x 64 x 849 24242549760 0.0% 100.0% 0.0% flops 169 x 64 x 853 24356766720 0.0% 100.0% 0.0% flops 169 x 64 x 858 24499537920 0.0% 100.0% 0.0% flops 173 x 96 x 849 24816337920 0.0% 100.0% 0.0% flops 178 x 96 x 96 24829820928 0.0% 100.0% 0.0% flops 173 x 96 x 853 24933258240 0.0% 100.0% 0.0% flops 231 x 64 x 64 25062309888 0.0% 100.0% 0.0% flops 173 x 96 x 858 25079408640 0.0% 100.0% 0.0% flops 178 x 96 x 849 25533573120 0.0% 100.0% 0.0% flops 178 x 96 x 853 25653872640 0.0% 100.0% 0.0% flops 178 x 96 x 858 25804247040 0.0% 100.0% 0.0% flops 32 x 96 x 64 30255611904 0.0% 100.0% 0.0% flops 32 x 64 x 96 30255611904 0.0% 100.0% 0.0% flops 218 x 64 x 96 30409555968 0.0% 100.0% 0.0% flops 218 x 96 x 64 30409555968 0.0% 100.0% 0.0% flops 218 x 64 x 849 31271454720 0.0% 100.0% 0.0% flops 218 x 64 x 853 31418787840 0.0% 100.0% 0.0% flops 218 x 64 x 858 31602954240 0.0% 100.0% 0.0% flops 64 x 96 x 849 32132136960 0.0% 100.0% 0.0% flops 64 x 96 x 853 32283525120 0.0% 100.0% 0.0% flops 64 x 96 x 858 32472760320 0.0% 100.0% 0.0% flops 271 x 96 x 96 37802704896 0.0% 100.0% 0.0% flops 209 x 96 x 64 38872154112 0.0% 100.0% 0.0% flops 209 x 64 x 96 38872154112 0.0% 100.0% 0.0% flops 271 x 96 x 849 38874147840 0.0% 100.0% 0.0% flops 271 x 96 x 853 39057300480 0.0% 100.0% 0.0% flops 271 x 96 x 858 39286241280 0.0% 100.0% 0.0% flops 209 x 64 x 849 39973908480 0.0% 100.0% 0.0% flops 209 x 64 x 853 40162242560 0.0% 100.0% 0.0% flops 209 x 64 x 858 40397660160 0.0% 100.0% 0.0% flops 9 x 9 x 64 67295121408 0.0% 100.0% 0.0% flops 169 x 96 x 96 70723141632 0.0% 100.0% 0.0% flops 169 x 96 x 849 72727649280 0.0% 100.0% 0.0% flops 169 x 96 x 853 73070300160 0.0% 100.0% 0.0% flops 169 x 96 x 858 73498613760 0.0% 100.0% 0.0% flops 32 x 64 x 849 74974986240 0.0% 100.0% 0.0% flops 231 x 64 x 96 75186929664 0.0% 100.0% 0.0% flops 231 x 96 x 64 75186929664 0.0% 100.0% 0.0% flops 32 x 64 x 853 75328225280 0.0% 100.0% 0.0% flops 32 x 64 x 858 75769774080 0.0% 100.0% 0.0% flops 231 x 64 x 849 77317954560 0.0% 100.0% 0.0% flops 231 x 64 x 853 77682232320 0.0% 100.0% 0.0% flops 231 x 64 x 858 78137579520 0.0% 100.0% 0.0% flops 9 x 22 x 64 87348856320 0.0% 100.0% 0.0% flops 22 x 9 x 64 87510601728 0.0% 100.0% 0.0% flops 32 x 96 x 96 90766835712 0.0% 100.0% 0.0% flops 218 x 96 x 96 91228667904 0.0% 100.0% 0.0% flops 218 x 96 x 849 93814364160 0.0% 100.0% 0.0% flops 218 x 96 x 853 94256363520 0.0% 100.0% 0.0% flops 218 x 96 x 858 94808862720 0.0% 100.0% 0.0% flops 22 x 22 x 64 113395453952 0.0% 100.0% 0.0% flops 209 x 96 x 96 116616462336 0.0% 100.0% 0.0% flops 209 x 96 x 849 119921725440 0.0% 100.0% 0.0% flops 209 x 96 x 853 120486727680 0.0% 100.0% 0.0% flops 209 x 96 x 858 121192980480 0.0% 100.0% 0.0% flops 9 x 9 x 96 201885364224 0.0% 100.0% 0.0% flops 32 x 96 x 849 224924958720 0.0% 100.0% 0.0% flops 231 x 96 x 96 225560788992 0.0% 100.0% 0.0% flops 32 x 96 x 853 225984675840 0.0% 100.0% 0.0% flops 32 x 96 x 858 227309322240 0.0% 100.0% 0.0% flops 231 x 96 x 849 231953863680 0.0% 100.0% 0.0% flops 231 x 96 x 853 233046696960 0.0% 100.0% 0.0% flops 231 x 96 x 858 234412738560 0.0% 100.0% 0.0% flops 9 x 22 x 96 262046568960 0.0% 100.0% 0.0% flops 22 x 9 x 96 262531805184 0.0% 100.0% 0.0% flops 22 x 22 x 96 340186361856 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 629542526976 100.0% 0.0% 0.0% flops total 12.909090E+12 4.9% 95.1% 0.0% flops max/rank 451.741913E+09 9.0% 91.0% 0.0% matmuls inhomo. stacks 62964 100.0% 0.0% 0.0% matmuls total 562477038 0.0% 100.0% 0.0% number of processed stacks 2497330 2.5% 97.5% 0.0% average stack size 1.0 231.0 0.0 marketing flops 15.646547E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 350.121984E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 739800 MPI messages size (bytes): total size 565.223162E+09 min size 0.000000E+00 max size 5.889312E+06 average size 764.021562E+03 MPI breakdown and total messages size (bytes): size <= 128 5610 0 128 < size <= 8192 0 0 8192 < size <= 32768 37270 1217658880 32768 < size <= 131072 295520 18156748800 131072 < size <= 4194304 335340 195349708800 4194304 < size <= 16777216 66060 350485115840 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 115 12. MP_Allreduce 11133 25. MP_Alltoall 8043 133840. MP_ISend 49276 334981. MP_IRecv 49276 330552. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3473 66308. MP_Allreduce 9775 564. MP_Sync 52 MP_Alltoall 1717 3889290. MP_SendRecv 7700 27936. MP_ISendRecv 7700 27936. MP_Wait 17864 MP_ISend 8316 219755. MP_IRecv 8316 219755. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.036 0.087 75.428 75.433 qs_mol_dyn_low 1 2.0 0.013 0.024 74.881 74.890 qs_forces 11 3.9 0.005 0.012 74.716 74.759 qs_energies 11 4.9 0.001 0.002 70.301 70.358 scf_env_do_scf 11 5.9 0.001 0.003 63.011 63.022 scf_env_do_scf_inner_loop 99 6.5 0.003 0.023 54.890 54.896 velocity_verlet 10 3.0 0.002 0.006 44.387 44.390 dbcsr_multiply_generic 2055 12.4 0.142 0.147 28.707 29.547 qs_scf_new_mos 99 7.5 0.001 0.001 25.664 26.086 qs_scf_loop_do_ot 99 8.5 0.001 0.001 25.663 26.086 ot_scf_mini 99 9.5 0.003 0.003 23.752 24.156 rebuild_ks_matrix 110 8.3 0.001 0.001 20.615 21.145 qs_ks_build_kohn_sham_matrix 110 9.3 0.016 0.018 20.614 21.144 multiply_cannon 2055 13.4 0.217 0.235 18.692 20.574 qs_ks_update_qs_env 110 7.6 0.001 0.001 18.223 18.690 multiply_cannon_loop 2055 14.4 0.157 0.167 16.376 18.208 qs_rho_update_rho_low 110 7.6 0.001 0.001 14.449 14.539 calculate_rho_elec 110 8.6 0.065 0.073 14.449 14.538 multiply_cannon_multrec 12330 15.4 12.007 14.397 12.026 14.417 ot_mini 99 10.5 0.001 0.002 12.853 13.299 sum_up_and_integrate 110 10.3 0.003 0.006 12.881 12.908 integrate_v_rspace 110 11.3 0.003 0.004 12.834 12.863 mp_waitall_1 141068 16.5 6.206 10.030 6.206 10.030 grid_collocate_task_list 110 9.6 9.512 9.802 9.512 9.802 grid_integrate_task_list 110 12.3 8.606 8.977 8.606 8.977 init_scf_loop 11 6.9 0.000 0.000 8.057 8.061 qs_ot_get_derivative 99 11.5 0.001 0.002 6.988 7.404 make_m2s 4110 13.4 0.082 0.088 6.727 7.175 make_images 4110 14.4 0.663 0.844 5.848 6.290 apply_preconditioner_dbcsr 110 12.6 0.000 0.001 5.592 6.033 apply_single 110 13.6 0.001 0.001 5.592 6.032 ot_diis_step 99 11.5 0.014 0.016 5.828 5.829 qs_ot_get_p 110 10.4 0.001 0.001 5.202 5.730 multiply_cannon_metrocomm3 12330 15.4 0.040 0.043 1.750 5.282 prepare_preconditioner 11 7.9 0.000 0.000 5.181 5.226 make_preconditioner 11 8.9 0.000 0.000 5.181 5.225 init_scf_run 11 5.9 0.000 0.006 4.892 4.892 scf_env_initial_rho_setup 11 6.9 0.000 0.005 4.891 4.892 make_full_inverse_cholesky 11 9.9 0.000 0.000 4.720 4.795 fft_wrap_pw1pw2 1111 11.6 0.017 0.028 4.595 4.662 wfi_extrapolate 11 7.9 0.001 0.001 4.237 4.237 fft_wrap_pw1pw2_140 451 12.1 0.212 0.230 3.978 4.113 density_rs2pw 110 9.6 0.006 0.006 3.852 4.113 make_images_data 4110 15.4 0.056 0.065 3.328 3.903 hybrid_alltoall_any 4261 16.3 0.184 1.014 3.180 3.854 fft3d_ps 1111 13.6 1.578 1.666 3.639 3.710 mp_alltoall_d11v 2046 13.8 3.188 3.449 3.188 3.449 mp_sum_l 10179 13.1 2.043 3.241 2.043 3.241 potential_pw2rs 110 12.3 0.014 0.017 3.025 3.054 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.696 2.762 qs_ot_p2m_diag 48 11.0 0.034 0.048 2.693 2.710 multiply_cannon_metrocomm1 12330 15.4 0.046 0.048 1.322 2.527 qs_ot_get_derivative_diag 47 12.0 0.001 0.001 2.251 2.488 cp_dbcsr_syevd 48 12.0 0.003 0.003 2.473 2.474 mp_allgather_i34 2055 14.4 0.974 2.412 0.974 2.412 qs_ot_get_derivative_taylor 52 13.0 0.002 0.002 2.213 2.408 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.003 2.353 2.359 cp_fm_cholesky_invert 11 10.9 2.316 2.326 2.316 2.326 calculate_dm_sparse 110 9.5 0.001 0.001 2.159 2.301 transfer_rs2pw 451 10.6 0.006 0.007 1.733 2.207 mp_waitany 8316 13.8 1.631 2.118 1.631 2.118 mp_irecv_dv 30428 16.1 0.922 2.047 0.922 2.047 multiply_cannon_metrocomm4 10275 15.4 0.039 0.042 0.899 1.912 qs_energies_init_hamiltonians 11 5.9 0.001 0.003 1.822 1.866 mp_alltoall_z22v 1111 15.6 1.724 1.843 1.724 1.843 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 1.692 1.826 transfer_pw2rs 451 13.1 0.006 0.007 1.756 1.769 cp_fm_diag_elpa 48 13.0 0.000 0.000 1.764 1.766 cp_fm_diag_elpa_base 48 14.0 1.721 1.724 1.758 1.758 dbcsr_complete_redistribute 325 12.2 0.369 0.435 1.572 1.659 copy_dbcsr_to_fm 151 11.3 0.003 0.003 1.502 1.644 transfer_rs2pw_140 121 11.5 0.236 0.273 1.150 1.609 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.374 1.603 mp_sum_d 3893 11.9 1.161 1.543 1.161 1.543 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="206", plot="h2o_128_md", label="(4n/9r/4t)", y=75.433000, yerr=0.000000 PlotPoint: name="207", plot="h2o_128_md_mem", label="(4n/9r/4t)", y=330.818182, yerr=4.281422 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/16/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 129 x 64 x 409 1485749760 0.0% 100.0% 0.0% flops 151 x 64 x 409 1739133440 0.0% 100.0% 0.0% flops 174 x 64 x 409 2004034560 0.0% 100.0% 0.0% flops 200 x 64 x 409 2303488000 0.0% 100.0% 0.0% flops 209 x 64 x 409 2407144960 0.0% 100.0% 0.0% flops 218 x 64 x 409 2510801920 0.0% 100.0% 0.0% flops 160 x 64 x 409 3685580800 0.0% 100.0% 0.0% flops 129 x 64 x 32 3998810112 0.0% 100.0% 0.0% flops 129 x 64 x 64 3998810112 0.0% 100.0% 0.0% flops 187 x 64 x 409 4307522560 0.0% 100.0% 0.0% flops 129 x 96 x 409 4457249280 0.0% 100.0% 0.0% flops 129 x 64 x 418 4555330560 0.0% 100.0% 0.0% flops 151 x 64 x 32 4680777728 0.0% 100.0% 0.0% flops 151 x 64 x 64 4680777728 0.0% 100.0% 0.0% flops 64 x 64 x 409 5159813120 0.0% 100.0% 0.0% flops 151 x 96 x 409 5217400320 0.0% 100.0% 0.0% flops 231 x 64 x 409 5321057280 0.0% 100.0% 0.0% flops 151 x 64 x 418 5332208640 0.0% 100.0% 0.0% flops 174 x 64 x 32 5393743872 0.0% 100.0% 0.0% flops 174 x 64 x 64 5393743872 0.0% 100.0% 0.0% flops 64 x 64 x 32 5762973696 0.0% 100.0% 0.0% flops 64 x 64 x 64 5762973696 0.0% 100.0% 0.0% flops 174 x 96 x 409 6012103680 0.0% 100.0% 0.0% flops 262 x 64 x 409 6035138560 0.0% 100.0% 0.0% flops 174 x 64 x 418 6144399360 0.0% 100.0% 0.0% flops 200 x 64 x 32 6199705600 0.0% 100.0% 0.0% flops 200 x 64 x 64 6199705600 0.0% 100.0% 0.0% flops 209 x 64 x 32 6478692352 0.0% 100.0% 0.0% flops 209 x 64 x 64 6478692352 0.0% 100.0% 0.0% flops 218 x 64 x 32 6757679104 0.0% 100.0% 0.0% flops 218 x 64 x 64 6757679104 0.0% 100.0% 0.0% flops 200 x 96 x 409 6910464000 0.0% 100.0% 0.0% flops 200 x 64 x 418 7062528000 0.0% 100.0% 0.0% flops 209 x 96 x 409 7221434880 0.0% 100.0% 0.0% flops 209 x 64 x 418 7380341760 0.0% 100.0% 0.0% flops 218 x 96 x 409 7532405760 0.0% 100.0% 0.0% flops 218 x 64 x 418 7698155520 0.0% 100.0% 0.0% flops 160 x 64 x 32 9919528960 0.0% 100.0% 0.0% flops 160 x 64 x 64 9919528960 0.0% 100.0% 0.0% flops 129 x 64 x 431 10959674880 0.0% 100.0% 0.0% flops 160 x 96 x 409 11056742400 0.0% 100.0% 0.0% flops 160 x 64 x 418 11300044800 0.0% 100.0% 0.0% flops 187 x 64 x 32 11593449472 0.0% 100.0% 0.0% flops 187 x 64 x 64 11593449472 0.0% 100.0% 0.0% flops 129 x 96 x 64 11996430336 0.0% 100.0% 0.0% flops 129 x 96 x 32 11996430336 0.0% 100.0% 0.0% flops 151 x 64 x 431 12828766720 0.0% 100.0% 0.0% flops 187 x 96 x 409 12922567680 0.0% 100.0% 0.0% flops 187 x 64 x 418 13206927360 0.0% 100.0% 0.0% flops 129 x 96 x 418 13665991680 0.0% 100.0% 0.0% flops 151 x 96 x 64 14042333184 0.0% 100.0% 0.0% flops 151 x 96 x 32 14042333184 0.0% 100.0% 0.0% flops 231 x 64 x 32 14321319936 0.0% 100.0% 0.0% flops 231 x 64 x 64 14321319936 0.0% 100.0% 0.0% flops 174 x 64 x 431 14782817280 0.0% 100.0% 0.0% flops 32 x 64 x 409 15479439360 0.0% 100.0% 0.0% flops 64 x 96 x 409 15479439360 0.0% 100.0% 0.0% flops 64 x 64 x 418 15820062720 0.0% 100.0% 0.0% flops 231 x 96 x 409 15963171840 0.0% 100.0% 0.0% flops 151 x 96 x 418 15996625920 0.0% 100.0% 0.0% flops 174 x 96 x 64 16181231616 0.0% 100.0% 0.0% flops 174 x 96 x 32 16181231616 0.0% 100.0% 0.0% flops 262 x 64 x 32 16243228672 0.0% 100.0% 0.0% flops 262 x 64 x 64 16243228672 0.0% 100.0% 0.0% flops 231 x 64 x 418 16314439680 0.0% 100.0% 0.0% flops 200 x 64 x 431 16991744000 0.0% 100.0% 0.0% flops 32 x 64 x 32 17288921088 0.0% 100.0% 0.0% flops 32 x 64 x 64 17288921088 0.0% 100.0% 0.0% flops 64 x 96 x 64 17288921088 0.0% 100.0% 0.0% flops 64 x 96 x 32 17288921088 0.0% 100.0% 0.0% flops 209 x 64 x 431 17756372480 0.0% 100.0% 0.0% flops 262 x 96 x 409 18105415680 0.0% 100.0% 0.0% flops 174 x 96 x 418 18433198080 0.0% 100.0% 0.0% flops 262 x 64 x 418 18503823360 0.0% 100.0% 0.0% flops 218 x 64 x 431 18521000960 0.0% 100.0% 0.0% flops 200 x 96 x 64 18599116800 0.0% 100.0% 0.0% flops 200 x 96 x 32 18599116800 0.0% 100.0% 0.0% flops 209 x 96 x 64 19436077056 0.0% 100.0% 0.0% flops 209 x 96 x 32 19436077056 0.0% 100.0% 0.0% flops 218 x 96 x 64 20273037312 0.0% 100.0% 0.0% flops 218 x 96 x 32 20273037312 0.0% 100.0% 0.0% flops 200 x 96 x 418 21187584000 0.0% 100.0% 0.0% flops 209 x 96 x 418 22141025280 0.0% 100.0% 0.0% flops 218 x 96 x 418 23094466560 0.0% 100.0% 0.0% flops 160 x 64 x 431 27186790400 0.0% 100.0% 0.0% flops 160 x 96 x 64 29758586880 0.0% 100.0% 0.0% flops 160 x 96 x 32 29758586880 0.0% 100.0% 0.0% flops 187 x 64 x 431 31774561280 0.0% 100.0% 0.0% flops 129 x 96 x 431 32879024640 0.0% 100.0% 0.0% flops 160 x 96 x 418 33900134400 0.0% 100.0% 0.0% flops 187 x 96 x 64 34780348416 0.0% 100.0% 0.0% flops 187 x 96 x 32 34780348416 0.0% 100.0% 0.0% flops 64 x 64 x 431 38061506560 0.0% 100.0% 0.0% flops 151 x 96 x 431 38486300160 0.0% 100.0% 0.0% flops 231 x 64 x 431 39250928640 0.0% 100.0% 0.0% flops 187 x 96 x 418 39620782080 0.0% 100.0% 0.0% flops 231 x 96 x 64 42963959808 0.0% 100.0% 0.0% flops 231 x 96 x 32 42963959808 0.0% 100.0% 0.0% flops 174 x 96 x 431 44348451840 0.0% 100.0% 0.0% flops 262 x 64 x 431 44518369280 0.0% 100.0% 0.0% flops 32 x 96 x 409 46438318080 0.0% 100.0% 0.0% flops 32 x 64 x 418 47460188160 0.0% 100.0% 0.0% flops 64 x 96 x 418 47460188160 0.0% 100.0% 0.0% flops 262 x 96 x 64 48729686016 0.0% 100.0% 0.0% flops 262 x 96 x 32 48729686016 0.0% 100.0% 0.0% flops 231 x 96 x 418 48943319040 0.0% 100.0% 0.0% flops 200 x 96 x 431 50975232000 0.0% 100.0% 0.0% flops 32 x 96 x 64 51866763264 0.0% 100.0% 0.0% flops 32 x 96 x 32 51866763264 0.0% 100.0% 0.0% flops 209 x 96 x 431 53269117440 0.0% 100.0% 0.0% flops 262 x 96 x 418 55511470080 0.0% 100.0% 0.0% flops 218 x 96 x 431 55563002880 0.0% 100.0% 0.0% flops 160 x 96 x 431 81560371200 0.0% 100.0% 0.0% flops 187 x 96 x 431 95323683840 0.0% 100.0% 0.0% flops 32 x 64 x 431 114184519680 0.0% 100.0% 0.0% flops 64 x 96 x 431 114184519680 0.0% 100.0% 0.0% flops 231 x 96 x 431 117752785920 0.0% 100.0% 0.0% flops 262 x 96 x 431 133555107840 0.0% 100.0% 0.0% flops 9 x 9 x 64 134590242816 0.0% 100.0% 0.0% flops 9 x 9 x 32 134590242816 0.0% 100.0% 0.0% flops 32 x 96 x 418 142380564480 0.0% 100.0% 0.0% flops 9 x 22 x 64 174697712640 0.0% 100.0% 0.0% flops 9 x 22 x 32 174697712640 0.0% 100.0% 0.0% flops 22 x 9 x 64 175021203456 0.0% 100.0% 0.0% flops 22 x 9 x 32 175021203456 0.0% 100.0% 0.0% flops 22 x 22 x 64 226790907904 0.0% 100.0% 0.0% flops 22 x 22 x 32 226790907904 0.0% 100.0% 0.0% flops 32 x 96 x 431 342553559040 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 2770741067776 100.0% 0.0% 0.0% flops total 13.483664E+12 20.5% 79.5% 0.0% flops max/rank 678.224219E+09 23.3% 76.7% 0.0% matmuls inhomo. stacks 461340 100.0% 0.0% 0.0% matmuls total 609143868 0.1% 99.9% 0.0% number of processed stacks 4707072 9.8% 90.2% 0.0% average stack size 1.0 143.4 0.0 marketing flops 15.646302E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 453.808128E+06 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 937080 MPI messages size (bytes): total size 523.723932E+09 min size 0.000000E+00 max size 4.537280E+06 average size 558.889250E+03 MPI breakdown and total messages size (bytes): size <= 128 6996 0 128 < size <= 8192 264 2162688 8192 < size <= 32768 304932 8165326848 32768 < size <= 131072 110640 6338641920 131072 < size <= 4194304 489498 400769458320 4194304 < size <= 16777216 24750 108449092400 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3473 66419. MP_Allreduce 9774 603. MP_Sync 52 MP_Alltoall 1496 5863162. MP_SendRecv 5060 43184. MP_ISendRecv 5060 43184. MP_Wait 20042 MP_ISend 13376 163145. MP_IRecv 13376 163145. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.017 0.037 86.565 86.652 qs_mol_dyn_low 1 2.0 0.011 0.020 86.193 86.278 qs_forces 11 3.9 0.003 0.003 86.061 86.151 qs_energies 11 4.9 0.001 0.002 81.597 81.684 scf_env_do_scf 11 5.9 0.001 0.003 73.588 73.655 scf_env_do_scf_inner_loop 99 6.5 0.003 0.027 59.476 59.533 velocity_verlet 10 3.0 0.002 0.006 52.373 52.428 dbcsr_multiply_generic 2055 12.4 0.159 0.163 32.637 33.311 qs_scf_new_mos 99 7.5 0.001 0.001 28.762 29.200 qs_scf_loop_do_ot 99 8.5 0.001 0.001 28.761 29.199 ot_scf_mini 99 9.5 0.003 0.003 26.702 27.139 rebuild_ks_matrix 110 8.3 0.001 0.001 21.550 22.020 qs_ks_build_kohn_sham_matrix 110 9.3 0.014 0.015 21.550 22.019 multiply_cannon 2055 13.4 0.240 0.265 20.049 21.861 qs_ks_update_qs_env 110 7.6 0.001 0.001 19.154 19.589 multiply_cannon_loop 2055 14.4 0.277 0.291 17.648 19.124 multiply_cannon_multrec 24660 15.4 13.567 16.063 13.587 16.084 qs_rho_update_rho_low 110 7.6 0.001 0.001 15.249 15.269 calculate_rho_elec 110 8.6 0.097 0.102 15.249 15.269 ot_mini 99 10.5 0.001 0.001 14.655 15.148 init_scf_loop 11 6.9 0.000 0.000 14.060 14.076 sum_up_and_integrate 110 10.3 0.002 0.003 13.049 13.064 integrate_v_rspace 110 11.3 0.003 0.004 13.001 13.014 prepare_preconditioner 11 7.9 0.000 0.000 10.994 11.038 make_preconditioner 11 8.9 0.000 0.000 10.994 11.038 grid_collocate_task_list 110 9.6 10.354 10.835 10.354 10.835 make_full_inverse_cholesky 11 9.9 0.000 0.000 9.049 10.540 make_m2s 4110 13.4 0.101 0.104 9.087 9.484 grid_integrate_task_list 110 12.3 8.947 9.236 8.947 9.236 mp_waitall_1 121746 16.5 6.025 8.357 6.025 8.357 qs_ot_get_derivative 99 11.5 0.001 0.001 7.661 8.115 make_images 4110 14.4 0.889 1.141 7.741 8.082 apply_preconditioner_dbcsr 110 12.6 0.000 0.000 6.540 7.082 apply_single 110 13.6 0.001 0.001 6.540 7.082 ot_diis_step 99 11.5 0.014 0.015 6.953 6.960 qs_ot_get_p 110 10.4 0.001 0.001 5.261 5.738 init_scf_run 11 5.9 0.000 0.004 5.390 5.395 scf_env_initial_rho_setup 11 6.9 0.000 0.004 5.390 5.395 cp_fm_upper_to_full 70 14.2 3.688 5.238 3.688 5.238 make_images_data 4110 15.4 0.064 0.069 4.334 4.795 fft_wrap_pw1pw2 1111 11.6 0.016 0.022 4.639 4.687 wfi_extrapolate 11 7.9 0.001 0.001 4.622 4.627 dbcsr_complete_redistribute 325 12.2 0.511 0.597 3.204 4.476 hybrid_alltoall_any 4261 16.3 0.146 0.484 3.818 4.469 fft_wrap_pw1pw2_140 451 12.1 0.236 0.246 4.080 4.123 density_rs2pw 110 9.6 0.006 0.006 3.797 4.121 multiply_cannon_metrocomm3 24660 15.4 0.049 0.051 1.882 3.833 copy_fm_to_dbcsr 174 11.2 0.001 0.001 2.471 3.723 fft3d_ps 1111 13.6 1.580 1.682 3.630 3.669 mp_alltoall_i22 605 13.7 1.975 3.308 1.975 3.308 mp_sum_l 10179 13.1 1.933 3.240 1.933 3.240 transfer_fm_to_dbcsr 11 9.9 0.002 0.008 1.930 3.157 mp_alltoall_d11v 2046 13.8 2.929 3.115 2.929 3.115 potential_pw2rs 110 12.3 0.018 0.019 2.891 2.898 cp_fm_cholesky_invert 11 10.9 2.865 2.879 2.865 2.879 qs_ot_p2m_diag 48 11.0 0.050 0.062 2.823 2.846 qs_ot_get_derivative_diag 47 12.0 0.001 0.002 2.527 2.833 multiply_cannon_metrocomm4 20550 15.4 0.075 0.080 1.499 2.806 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.721 2.761 mp_irecv_dv 62702 16.1 1.414 2.719 1.414 2.719 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 2.681 2.707 cp_dbcsr_syevd 48 12.0 0.003 0.003 2.545 2.552 qs_ot_get_derivative_taylor 52 13.0 0.002 0.002 2.293 2.448 calculate_dm_sparse 110 9.5 0.001 0.001 2.324 2.418 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 2.013 2.129 mp_allgather_i34 2055 14.4 0.868 2.071 0.868 2.071 qs_energies_init_hamiltonians 11 5.9 0.001 0.004 2.016 2.027 mp_waitany 13376 13.8 1.596 1.965 1.596 1.965 cp_fm_diag_elpa 48 13.0 0.000 0.000 1.909 1.913 cp_fm_diag_elpa_base 48 14.0 1.798 1.831 1.905 1.907 transfer_rs2pw 451 10.6 0.006 0.006 1.550 1.878 mp_alltoall_z22v 1111 15.6 1.750 1.844 1.750 1.844 copy_dbcsr_to_fm 151 11.3 0.003 0.003 1.601 1.741 dbcsr_dot_sd 1091 11.9 0.479 0.513 1.272 1.741 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="208", plot="h2o_128_md", label="(4n/6r/6t)", y=86.652000, yerr=0.000000 PlotPoint: name="209", plot="h2o_128_md_mem", label="(4n/6r/6t)", y=428.000000, yerr=10.081486 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/17/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 89 x 128 x 128 22070951936 0.0% 100.0% 0.0% flops 89 x 128 x 1280 51327795200 0.0% 100.0% 0.0% flops 80 x 128 x 128 59517173760 0.0% 100.0% 0.0% flops 151 x 128 x 128 74892443648 0.0% 100.0% 0.0% flops 182 x 128 x 128 90267713536 0.0% 100.0% 0.0% flops 32 x 128 x 128 92207579136 0.0% 100.0% 0.0% flops 64 x 128 x 128 92207579136 0.0% 100.0% 0.0% flops 80 x 128 x 1280 138412032000 0.0% 100.0% 0.0% flops 151 x 128 x 1280 174168473600 0.0% 100.0% 0.0% flops 160 x 128 x 128 198390579200 0.0% 100.0% 0.0% flops 182 x 128 x 1280 209924915200 0.0% 100.0% 0.0% flops 129 x 128 x 128 255923847168 0.0% 100.0% 0.0% flops 9 x 9 x 128 269180485632 0.0% 100.0% 0.0% flops 9 x 22 x 128 349395425280 0.0% 100.0% 0.0% flops 22 x 9 x 128 350042406912 0.0% 100.0% 0.0% flops 22 x 22 x 128 453581815808 0.0% 100.0% 0.0% flops 160 x 128 x 1280 461373440000 0.0% 100.0% 0.0% flops 32 x 128 x 1280 516738252800 0.0% 100.0% 0.0% flops 64 x 128 x 1280 516738252800 0.0% 100.0% 0.0% flops 129 x 128 x 1280 595171737600 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 2199488299008 100.0% 0.0% 0.0% flops total 13.192496E+12 16.7% 83.3% 0.0% flops max/rank 860.633972E+09 17.4% 82.6% 0.0% matmuls inhomo. stacks 139920 100.0% 0.0% 0.0% matmuls total 546715604 0.0% 100.0% 0.0% number of processed stacks 1575496 8.9% 91.1% 0.0% average stack size 1.0 380.7 0.0 marketing flops 15.646297E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 613.265408E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 197280 MPI messages size (bytes): total size 339.125567E+09 min size 0.000000E+00 max size 13.107200E+06 average size 1.719006E+06 MPI breakdown and total messages size (bytes): size <= 128 1452 0 128 < size <= 8192 0 0 8192 < size <= 32768 132 4325376 32768 < size <= 131072 88656 11620319232 131072 < size <= 4194304 89424 117209825280 4194304 < size <= 16777216 17616 210291069504 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 27 12. MP_Allreduce 10957 25. MP_Alltoall 8043 258767. MP_ISend 32836 652428. MP_IRecv 32836 652812. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3473 66417. MP_Allreduce 9774 644. MP_Sync 52 MP_Alltoall 1496 8504061. MP_SendRecv 3300 54848. MP_ISendRecv 3300 54848. MP_Wait 13926 MP_ISend 9240 278857. MP_IRecv 9240 278857. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.051 0.073 77.899 77.903 qs_mol_dyn_low 1 2.0 0.011 0.023 77.497 77.507 qs_forces 11 3.9 0.005 0.014 77.334 77.344 qs_energies 11 4.9 0.007 0.026 72.908 72.921 scf_env_do_scf 11 5.9 0.001 0.001 64.706 64.708 scf_env_do_scf_inner_loop 99 6.5 0.004 0.016 54.155 54.157 velocity_verlet 10 3.0 0.002 0.006 46.303 46.310 dbcsr_multiply_generic 2055 12.4 0.158 0.167 27.689 27.832 qs_scf_new_mos 99 7.5 0.001 0.002 25.204 25.338 qs_scf_loop_do_ot 99 8.5 0.001 0.001 25.203 25.337 ot_scf_mini 99 9.5 0.003 0.004 23.347 23.454 rebuild_ks_matrix 110 8.3 0.001 0.001 19.978 20.073 qs_ks_build_kohn_sham_matrix 110 9.3 0.016 0.024 19.977 20.073 multiply_cannon 2055 13.4 0.222 0.230 18.030 19.529 qs_ks_update_qs_env 110 7.6 0.001 0.001 17.756 17.868 multiply_cannon_loop 2055 14.4 0.151 0.191 16.283 16.633 qs_rho_update_rho_low 110 7.6 0.001 0.001 14.837 14.852 calculate_rho_elec 110 8.6 0.144 0.145 14.836 14.851 multiply_cannon_multrec 8220 15.4 13.223 14.225 13.243 14.245 ot_mini 99 10.5 0.001 0.001 12.685 12.813 sum_up_and_integrate 110 10.3 0.002 0.003 12.372 12.405 integrate_v_rspace 110 11.3 0.003 0.003 12.331 12.364 init_scf_loop 11 6.9 0.001 0.005 10.491 10.494 grid_collocate_task_list 110 9.6 9.631 10.107 9.631 10.107 grid_integrate_task_list 110 12.3 8.345 8.572 8.345 8.572 prepare_preconditioner 11 7.9 0.000 0.000 7.663 7.693 make_preconditioner 11 8.9 0.001 0.002 7.663 7.693 make_m2s 4110 13.4 0.078 0.087 6.993 7.308 qs_ot_get_derivative 99 11.5 0.001 0.002 6.909 7.024 make_full_inverse_cholesky 11 9.9 0.000 0.000 6.885 7.017 make_images 4110 14.4 0.899 0.974 5.674 6.004 mp_waitall_1 103326 16.6 4.883 5.739 4.883 5.739 ot_diis_step 99 11.5 0.021 0.027 5.738 5.739 apply_preconditioner_dbcsr 110 12.6 0.000 0.001 5.504 5.599 apply_single 110 13.6 0.000 0.001 5.504 5.598 qs_ot_get_p 110 10.4 0.001 0.001 5.374 5.488 fft_wrap_pw1pw2 1111 11.6 0.014 0.018 5.326 5.468 init_scf_run 11 5.9 0.000 0.003 5.254 5.255 scf_env_initial_rho_setup 11 6.9 0.001 0.003 5.254 5.255 density_rs2pw 110 9.6 0.005 0.005 4.192 5.106 fft_wrap_pw1pw2_140 451 12.1 0.289 0.303 4.592 4.846 wfi_extrapolate 11 7.9 0.002 0.004 4.445 4.445 fft3d_ps 1111 13.6 1.750 1.976 4.233 4.308 make_images_data 4110 15.4 0.054 0.062 3.228 3.680 hybrid_alltoall_any 4261 16.3 0.309 1.232 3.201 3.589 cp_fm_cholesky_invert 11 10.9 3.284 3.296 3.284 3.296 mp_alltoall_d11v 2046 13.8 2.786 3.169 2.786 3.169 qs_ot_p2m_diag 48 11.0 0.073 0.081 3.097 3.102 cp_dbcsr_syevd 48 12.0 0.003 0.004 2.800 2.802 potential_pw2rs 110 12.3 0.021 0.023 2.770 2.777 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 2.622 2.647 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 2.619 2.625 dbcsr_complete_redistribute 325 12.2 0.797 0.930 2.423 2.601 transfer_rs2pw 451 10.6 0.006 0.006 1.584 2.390 mp_waitany 9240 13.8 1.597 2.349 1.597 2.349 mp_alltoall_z22v 1111 15.6 2.134 2.348 2.134 2.348 qs_energies_init_hamiltonians 11 5.9 0.001 0.004 2.318 2.327 qs_ot_get_derivative_diag 47 12.0 0.001 0.001 2.202 2.291 calculate_dm_sparse 110 9.5 0.001 0.001 2.133 2.216 cp_fm_diag_elpa 48 13.0 0.000 0.000 2.173 2.174 cp_fm_diag_elpa_base 48 14.0 2.082 2.117 2.171 2.171 mp_allgather_i34 2055 14.4 0.789 2.165 0.789 2.165 copy_dbcsr_to_fm 151 11.3 0.003 0.003 1.965 2.085 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 1.912 2.067 cp_fm_cholesky_decompose 22 10.9 1.976 2.014 1.976 2.014 transfer_rs2pw_140 121 11.5 0.184 0.217 1.161 1.949 qs_ot_get_derivative_taylor 52 13.0 0.002 0.002 1.904 1.938 mp_sum_l 10179 13.1 1.225 1.683 1.225 1.683 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 1.487 1.613 multiply_cannon_metrocomm1 8220 15.4 0.029 0.030 1.091 1.605 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="210", plot="h2o_128_md", label="(4n/4r/9t)", y=77.903000, yerr=0.000000 PlotPoint: name="211", plot="h2o_128_md_mem", label="(4n/4r/9t)", y=567.545455, yerr=22.685052 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/18/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 71 x 64 x 64 1100447744 0.0% 100.0% 0.0% flops 64 x 64 x 64 1440743424 0.0% 100.0% 0.0% flops 71 x 64 x 840 1679462400 0.0% 100.0% 0.0% flops 71 x 64 x 849 1697456640 0.0% 100.0% 0.0% flops 129 x 64 x 64 1999405056 0.0% 100.0% 0.0% flops 129 x 64 x 840 3051417600 0.0% 100.0% 0.0% flops 129 x 64 x 849 3084111360 0.0% 100.0% 0.0% flops 71 x 64 x 96 3301343232 0.0% 100.0% 0.0% flops 71 x 96 x 64 3301343232 0.0% 100.0% 0.0% flops 71 x 64 x 858 3430901760 0.0% 100.0% 0.0% flops 64 x 96 x 64 4322230272 0.0% 100.0% 0.0% flops 64 x 64 x 96 4322230272 0.0% 100.0% 0.0% flops 71 x 96 x 840 5038387200 0.0% 100.0% 0.0% flops 71 x 96 x 849 5092369920 0.0% 100.0% 0.0% flops 64 x 64 x 840 5298585600 0.0% 100.0% 0.0% flops 64 x 64 x 849 5355356160 0.0% 100.0% 0.0% flops 129 x 96 x 64 5998215168 0.0% 100.0% 0.0% flops 129 x 64 x 96 5998215168 0.0% 100.0% 0.0% flops 129 x 64 x 858 6233610240 0.0% 100.0% 0.0% flops 209 x 64 x 64 6478692352 0.0% 100.0% 0.0% flops 222 x 64 x 64 6881673216 0.0% 100.0% 0.0% flops 129 x 96 x 840 9154252800 0.0% 100.0% 0.0% flops 129 x 96 x 849 9252334080 0.0% 100.0% 0.0% flops 209 x 64 x 840 9887539200 0.0% 100.0% 0.0% flops 71 x 96 x 96 9904029696 0.0% 100.0% 0.0% flops 209 x 64 x 849 9993477120 0.0% 100.0% 0.0% flops 32 x 64 x 64 10085203968 0.0% 100.0% 0.0% flops 71 x 96 x 858 10292705280 0.0% 100.0% 0.0% flops 222 x 64 x 840 10502553600 0.0% 100.0% 0.0% flops 222 x 64 x 849 10615080960 0.0% 100.0% 0.0% flops 231 x 64 x 64 10740989952 0.0% 100.0% 0.0% flops 64 x 64 x 858 10824253440 0.0% 100.0% 0.0% flops 240 x 64 x 64 11159470080 0.0% 100.0% 0.0% flops 64 x 96 x 96 12966690816 0.0% 100.0% 0.0% flops 64 x 96 x 840 15895756800 0.0% 100.0% 0.0% flops 64 x 96 x 849 16066068480 0.0% 100.0% 0.0% flops 231 x 64 x 840 16392499200 0.0% 100.0% 0.0% flops 231 x 64 x 849 16568133120 0.0% 100.0% 0.0% flops 240 x 64 x 840 17031168000 0.0% 100.0% 0.0% flops 240 x 64 x 849 17213644800 0.0% 100.0% 0.0% flops 129 x 96 x 96 17994645504 0.0% 100.0% 0.0% flops 129 x 96 x 858 18700830720 0.0% 100.0% 0.0% flops 209 x 96 x 64 19436077056 0.0% 100.0% 0.0% flops 209 x 64 x 96 19436077056 0.0% 100.0% 0.0% flops 209 x 64 x 858 20198830080 0.0% 100.0% 0.0% flops 222 x 64 x 96 20645019648 0.0% 100.0% 0.0% flops 222 x 96 x 64 20645019648 0.0% 100.0% 0.0% flops 222 x 64 x 858 21455216640 0.0% 100.0% 0.0% flops 209 x 96 x 840 29662617600 0.0% 100.0% 0.0% flops 209 x 96 x 849 29980431360 0.0% 100.0% 0.0% flops 32 x 96 x 64 30255611904 0.0% 100.0% 0.0% flops 32 x 64 x 96 30255611904 0.0% 100.0% 0.0% flops 222 x 96 x 840 31507660800 0.0% 100.0% 0.0% flops 222 x 96 x 849 31845242880 0.0% 100.0% 0.0% flops 231 x 64 x 96 32222969856 0.0% 100.0% 0.0% flops 231 x 96 x 64 32222969856 0.0% 100.0% 0.0% flops 64 x 96 x 858 32472760320 0.0% 100.0% 0.0% flops 240 x 96 x 64 33478410240 0.0% 100.0% 0.0% flops 240 x 64 x 96 33478410240 0.0% 100.0% 0.0% flops 231 x 64 x 858 33487534080 0.0% 100.0% 0.0% flops 240 x 64 x 858 34792243200 0.0% 100.0% 0.0% flops 32 x 64 x 840 37090099200 0.0% 100.0% 0.0% flops 32 x 64 x 849 37487493120 0.0% 100.0% 0.0% flops 231 x 96 x 840 49177497600 0.0% 100.0% 0.0% flops 231 x 96 x 849 49704399360 0.0% 100.0% 0.0% flops 240 x 96 x 840 51093504000 0.0% 100.0% 0.0% flops 240 x 96 x 849 51640934400 0.0% 100.0% 0.0% flops 209 x 96 x 96 58308231168 0.0% 100.0% 0.0% flops 209 x 96 x 858 60596490240 0.0% 100.0% 0.0% flops 222 x 96 x 96 61935058944 0.0% 100.0% 0.0% flops 222 x 96 x 858 64365649920 0.0% 100.0% 0.0% flops 9 x 9 x 64 67295121408 0.0% 100.0% 0.0% flops 32 x 64 x 858 75769774080 0.0% 100.0% 0.0% flops 9 x 22 x 64 87348856320 0.0% 100.0% 0.0% flops 22 x 9 x 64 87510601728 0.0% 100.0% 0.0% flops 32 x 96 x 96 90766835712 0.0% 100.0% 0.0% flops 231 x 96 x 96 96668909568 0.0% 100.0% 0.0% flops 240 x 96 x 96 100435230720 0.0% 100.0% 0.0% flops 231 x 96 x 858 100462602240 0.0% 100.0% 0.0% flops 240 x 96 x 858 104376729600 0.0% 100.0% 0.0% flops 32 x 96 x 840 111270297600 0.0% 100.0% 0.0% flops 32 x 96 x 849 112462479360 0.0% 100.0% 0.0% flops 22 x 22 x 64 113395453952 0.0% 100.0% 0.0% flops 9 x 9 x 96 201885364224 0.0% 100.0% 0.0% flops 32 x 96 x 858 227309322240 0.0% 100.0% 0.0% flops 9 x 22 x 96 262046568960 0.0% 100.0% 0.0% flops 22 x 9 x 96 262531805184 0.0% 100.0% 0.0% flops 22 x 22 x 96 340186361856 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 3766099836928 100.0% 0.0% 0.0% flops total 13.644522E+12 27.6% 72.4% 0.0% flops max/rank 1.282868E+12 28.0% 72.0% 0.0% matmuls inhomo. stacks 336996 100.0% 0.0% 0.0% matmuls total 562477038 0.1% 99.9% 0.0% number of processed stacks 2521520 13.4% 86.6% 0.0% average stack size 1.0 257.3 0.0 marketing flops 15.646302E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 788.811776E+06 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 197280 MPI messages size (bytes): total size 482.240758E+09 min size 0.000000E+00 max size 17.688240E+06 average size 2.444448E+06 MPI breakdown and total messages size (bytes): size <= 128 1386 0 128 < size <= 8192 0 0 8192 < size <= 32768 4706 153485312 32768 < size <= 131072 50860 4081582080 131072 < size <= 4194304 118308 127519948800 4194304 < size <= 16777216 15420 235141755840 16777216 < size 6600 115343360000 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3473 66888. MP_Allreduce 9774 769. MP_Sync 52 MP_Alltoall 1496 10935805. MP_SendRecv 2420 70608. MP_ISendRecv 2420 70608. MP_Wait 11198 MP_ISend 7392 401928. MP_IRecv 7392 401928. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.134 0.223 114.129 114.135 qs_mol_dyn_low 1 2.0 0.022 0.036 113.485 113.495 qs_forces 11 3.9 0.014 0.030 113.285 113.333 qs_energies 11 4.9 0.005 0.010 107.895 107.948 scf_env_do_scf 11 5.9 0.001 0.003 97.050 97.070 scf_env_do_scf_inner_loop 99 6.5 0.005 0.025 74.459 74.475 velocity_verlet 10 3.0 0.003 0.006 72.191 72.203 dbcsr_multiply_generic 2055 12.4 0.168 0.175 47.221 48.501 qs_scf_new_mos 99 7.5 0.001 0.001 39.880 40.484 qs_scf_loop_do_ot 99 8.5 0.001 0.001 39.879 40.483 ot_scf_mini 99 9.5 0.004 0.004 37.788 38.480 multiply_cannon 2055 13.4 0.243 0.260 31.068 33.288 multiply_cannon_loop 2055 14.4 0.205 0.222 28.759 31.285 rebuild_ks_matrix 110 8.3 0.001 0.001 25.012 25.860 qs_ks_build_kohn_sham_matrix 110 9.3 0.016 0.021 25.011 25.859 mp_waitall_1 102446 16.6 16.307 24.752 16.307 24.752 qs_ks_update_qs_env 110 7.6 0.001 0.001 22.318 23.091 multiply_cannon_multrec 12330 15.4 15.057 23.045 15.084 23.072 init_scf_loop 11 6.9 0.002 0.005 22.521 22.534 ot_mini 99 10.5 0.001 0.001 20.903 21.588 multiply_cannon_metrocomm3 12330 15.4 0.043 0.044 11.218 19.339 prepare_preconditioner 11 7.9 0.000 0.000 18.473 18.573 make_preconditioner 11 8.9 0.001 0.001 18.473 18.572 make_full_inverse_cholesky 11 9.9 0.000 0.000 15.164 17.754 qs_rho_update_rho_low 110 7.6 0.001 0.001 16.632 16.711 calculate_rho_elec 110 8.6 0.190 0.199 16.631 16.710 sum_up_and_integrate 110 10.3 0.003 0.005 13.039 13.059 integrate_v_rspace 110 11.3 0.003 0.004 12.994 13.013 grid_collocate_task_list 110 9.6 11.538 11.692 11.538 11.692 apply_preconditioner_dbcsr 110 12.6 0.001 0.001 10.589 11.353 apply_single 110 13.6 0.000 0.001 10.588 11.352 make_m2s 4110 13.4 0.090 0.093 10.742 11.260 ot_diis_step 99 11.5 0.020 0.021 11.224 11.225 qs_ot_get_derivative 99 11.5 0.001 0.002 9.643 10.331 cp_fm_upper_to_full 70 14.2 7.077 9.656 7.077 9.656 make_images 4110 14.4 1.173 1.390 9.015 9.486 grid_integrate_task_list 110 12.3 9.162 9.247 9.162 9.247 qs_ot_get_p 110 10.4 0.001 0.001 6.853 8.013 multiply_cannon_metrocomm4 10275 15.4 0.044 0.048 2.127 7.890 mp_irecv_dv 29063 16.1 2.050 7.812 2.050 7.812 init_scf_run 11 5.9 0.001 0.005 7.252 7.253 scf_env_initial_rho_setup 11 6.9 0.001 0.002 7.251 7.252 dbcsr_complete_redistribute 325 12.2 0.910 1.153 5.081 6.931 make_images_data 4110 15.4 0.058 0.062 5.800 6.763 hybrid_alltoall_any 4261 16.3 0.194 0.427 5.635 6.692 wfi_extrapolate 11 7.9 0.004 0.014 6.239 6.240 copy_fm_to_dbcsr 174 11.2 0.001 0.002 3.963 5.824 fft_wrap_pw1pw2 1111 11.6 0.016 0.018 5.220 5.267 mp_alltoall_i22 605 13.7 3.220 5.162 3.220 5.162 transfer_fm_to_dbcsr 11 9.9 0.002 0.006 3.289 5.129 mp_sum_l 10179 13.1 3.434 5.099 3.434 5.099 cp_fm_cholesky_invert 11 10.9 4.643 4.655 4.643 4.655 fft_wrap_pw1pw2_140 451 12.1 0.351 0.360 4.565 4.616 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 4.236 4.254 fft3d_ps 1111 13.6 1.837 1.953 4.069 4.129 density_rs2pw 110 9.6 0.005 0.006 3.838 4.003 mp_alltoall_d11v 2046 13.8 3.425 3.617 3.425 3.617 qs_ot_get_derivative_taylor 52 13.0 0.002 0.002 2.949 3.585 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 3.174 3.494 qs_ot_get_derivative_diag 47 12.0 0.001 0.002 3.075 3.416 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.211 3.352 qs_ot_p2m_diag 48 11.0 0.096 0.115 3.229 3.250 dbcsr_dot_sd 1091 11.9 0.815 0.889 2.152 3.148 copy_dbcsr_to_fm 151 11.3 0.003 0.003 2.604 3.017 qs_energies_init_hamiltonians 11 5.9 0.003 0.005 2.884 2.938 cp_dbcsr_syevd 48 12.0 0.003 0.004 2.903 2.904 mp_sum_d 3891 11.9 1.681 2.662 1.681 2.662 potential_pw2rs 110 12.3 0.021 0.022 2.648 2.653 calculate_dm_sparse 110 9.5 0.001 0.001 2.384 2.561 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="212", plot="h2o_128_md", label="(4n/3r/12t)", y=114.135000, yerr=0.000000 PlotPoint: name="213", plot="h2o_128_md_mem", label="(4n/3r/12t)", y=723.636364, yerr=24.510750 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/19/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 142 x 128 x 128 70428655616 0.0% 100.0% 0.0% flops 129 x 128 x 128 127961923584 0.0% 100.0% 0.0% flops 142 x 128 x 1280 163787571200 0.0% 100.0% 0.0% flops 32 x 128 x 128 184415158272 0.0% 100.0% 0.0% flops 160 x 128 x 128 238068695040 0.0% 100.0% 0.0% flops 138 x 128 x 128 239556624384 0.0% 100.0% 0.0% flops 9 x 9 x 128 269180485632 0.0% 100.0% 0.0% flops 129 x 128 x 1280 297585868800 0.0% 100.0% 0.0% flops 9 x 22 x 128 349395425280 0.0% 100.0% 0.0% flops 22 x 9 x 128 350042406912 0.0% 100.0% 0.0% flops 22 x 22 x 128 453581815808 0.0% 100.0% 0.0% flops 160 x 128 x 1280 553648128000 0.0% 100.0% 0.0% flops 138 x 128 x 1280 557108428800 0.0% 100.0% 0.0% flops 32 x 128 x 1280 1033476505600 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 2588749070336 100.0% 0.0% 0.0% flops total 13.498461E+12 19.2% 80.8% 0.0% flops max/rank 1.745688E+12 20.7% 79.3% 0.0% matmuls inhomo. stacks 158576 100.0% 0.0% 0.0% matmuls total 546784212 0.0% 100.0% 0.0% number of processed stacks 1648032 9.6% 90.4% 0.0% average stack size 1.0 367.0 0.0 marketing flops 15.646297E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 1.954480E+09 # max total images/rank 2 # max 3D layers 1 # MPI messages exchanged 82200 MPI messages size (bytes): total size 297.640985E+09 min size 0.000000E+00 max size 26.214400E+06 average size 3.620936E+06 MPI breakdown and total messages size (bytes): size <= 128 572 0 128 < size <= 8192 0 0 8192 < size <= 32768 44 1441792 32768 < size <= 131072 18560 2432696320 131072 < size <= 4194304 54216 84915781632 4194304 < size <= 16777216 0 0 16777216 < size 8808 210291069504 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3462 67098. MP_Allreduce 9752 812. MP_Sync 52 MP_Alltoall 1474 16505187. MP_SendRecv 2310 360267. MP_ISendRecv 2310 360267. MP_Wait 5214 MP_ISend 2420 1187840. MP_IRecv 2420 1187840. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.057 0.075 116.680 116.682 qs_mol_dyn_low 1 2.0 0.020 0.028 116.203 116.217 qs_forces 11 3.9 0.011 0.014 116.053 116.057 qs_energies 11 4.9 0.008 0.010 110.763 110.780 scf_env_do_scf 11 5.9 0.001 0.004 99.769 99.770 velocity_verlet 10 3.0 0.002 0.003 75.731 75.738 scf_env_do_scf_inner_loop 99 6.5 0.006 0.034 67.462 67.464 dbcsr_multiply_generic 2055 12.4 0.178 0.187 33.461 33.854 init_scf_loop 11 6.9 0.005 0.009 32.222 32.224 qs_scf_new_mos 99 7.5 0.001 0.001 29.785 30.199 qs_scf_loop_do_ot 99 8.5 0.001 0.001 29.784 30.198 prepare_preconditioner 11 7.9 0.000 0.000 28.845 28.901 make_preconditioner 11 8.9 0.000 0.001 28.845 28.901 make_full_inverse_cholesky 11 9.9 0.000 0.000 22.883 28.118 ot_scf_mini 99 9.5 0.003 0.004 27.551 27.826 rebuild_ks_matrix 110 8.3 0.001 0.001 23.225 23.456 qs_ks_build_kohn_sham_matrix 110 9.3 0.023 0.027 23.224 23.455 multiply_cannon 2055 13.4 0.255 0.271 21.201 23.107 qs_rho_update_rho_low 110 7.6 0.001 0.001 21.565 21.609 calculate_rho_elec 110 8.6 0.279 0.280 21.564 21.609 qs_ks_update_qs_env 110 7.6 0.001 0.001 20.848 21.029 multiply_cannon_loop 2055 14.4 0.136 0.142 19.384 19.855 cp_fm_upper_to_full 70 14.2 12.345 17.685 12.345 17.685 grid_collocate_task_list 110 9.6 15.821 16.032 15.821 16.032 ot_mini 99 10.5 0.002 0.002 14.896 15.175 multiply_cannon_multrec 8220 15.4 12.585 13.941 12.612 13.968 sum_up_and_integrate 110 10.3 0.002 0.003 13.403 13.412 integrate_v_rspace 110 11.3 0.003 0.003 13.351 13.360 mp_waitall_1 84994 16.7 9.471 12.686 9.471 12.686 dbcsr_complete_redistribute 325 12.2 1.270 1.481 8.111 11.246 make_m2s 4110 13.4 0.079 0.080 9.225 9.862 copy_fm_to_dbcsr 174 11.2 0.001 0.002 6.648 9.779 grid_integrate_task_list 110 12.3 9.416 9.477 9.416 9.477 transfer_fm_to_dbcsr 11 9.9 0.002 0.002 5.947 9.062 mp_alltoall_i22 605 13.7 5.607 8.893 5.607 8.893 multiply_cannon_metrocomm3 8220 15.4 0.026 0.026 5.361 8.186 make_images 4110 14.4 1.480 1.555 7.244 7.906 qs_ot_get_derivative 99 11.5 0.002 0.003 7.534 7.815 ot_diis_step 99 11.5 0.025 0.026 7.326 7.327 apply_preconditioner_dbcsr 110 12.6 0.001 0.001 7.076 7.181 apply_single 110 13.6 0.000 0.000 7.075 7.181 init_scf_run 11 5.9 0.001 0.005 6.873 6.874 scf_env_initial_rho_setup 11 6.9 0.002 0.002 6.872 6.874 cp_fm_cholesky_invert 11 10.9 6.400 6.409 6.400 6.409 qs_ot_get_p 110 10.4 0.001 0.001 5.799 6.165 fft_wrap_pw1pw2 1111 11.6 0.016 0.019 5.915 5.940 wfi_extrapolate 11 7.9 0.002 0.005 5.864 5.865 hybrid_alltoall_any 4261 16.3 0.409 0.912 4.364 5.437 fft_wrap_pw1pw2_140 451 12.1 0.500 0.506 5.233 5.261 make_images_data 4110 15.4 0.057 0.061 4.345 5.208 density_rs2pw 110 9.6 0.005 0.006 4.595 4.885 fft3d_ps 1111 13.6 2.452 2.499 4.487 4.519 qs_ot_p2m_diag 48 11.0 0.138 0.149 3.709 3.720 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 3.414 3.421 qs_energies_init_hamiltonians 11 5.9 0.003 0.005 3.356 3.360 multiply_cannon_metrocomm4 6165 15.4 0.025 0.027 1.175 3.358 cp_dbcsr_syevd 48 12.0 0.003 0.004 3.349 3.350 mp_irecv_dv 17923 16.3 1.125 3.278 1.125 3.278 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.049 3.099 potential_pw2rs 110 12.3 0.029 0.029 2.970 2.975 copy_dbcsr_to_fm 151 11.3 0.003 0.003 2.707 2.945 cp_fm_cholesky_decompose 22 10.9 2.875 2.890 2.875 2.890 mp_alltoall_d11v 2046 13.8 2.738 2.824 2.738 2.824 calculate_dm_sparse 110 9.5 0.001 0.001 2.567 2.747 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 2.516 2.719 cp_fm_diag_elpa 48 13.0 0.000 0.000 2.717 2.718 cp_fm_diag_elpa_base 48 14.0 2.261 2.367 2.715 2.715 qs_ot_get_derivative_diag 47 12.0 0.001 0.001 2.292 2.429 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="214", plot="h2o_128_md", label="(4n/2r/18t)", y=116.682000, yerr=0.000000 PlotPoint: name="215", plot="h2o_128_md_mem", label="(4n/2r/18t)", y=1590.454545, yerr=215.461181 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/20/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 830472192 0.0% 100.0% 0.0% flops 22 x 32 x 32 1015021568 0.0% 100.0% 0.0% flops 32 x 256 x 256 184415158272 0.0% 100.0% 0.0% flops 49 x 256 x 256 194422767616 0.0% 100.0% 0.0% flops 71 x 256 x 256 246500294656 0.0% 100.0% 0.0% flops 9 x 9 x 256 269180485632 0.0% 100.0% 0.0% flops 9 x 22 x 256 349395425280 0.0% 100.0% 0.0% flops 22 x 9 x 256 350042406912 0.0% 100.0% 0.0% flops 80 x 256 x 256 396781158400 0.0% 100.0% 0.0% flops 49 x 256 x 2560 452145971200 0.0% 100.0% 0.0% flops 22 x 22 x 256 453581815808 0.0% 100.0% 0.0% flops 71 x 256 x 2560 573256499200 0.0% 100.0% 0.0% flops 80 x 256 x 2560 922746880000 0.0% 100.0% 0.0% flops 32 x 256 x 2560 1033476505600 0.0% 100.0% 0.0% flops 9 x 32 x 9 1138002296832 0.0% 100.0% 0.0% flops 9 x 32 x 22 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 9 1485592289280 0.0% 100.0% 0.0% flops 22 x 32 x 22 1910442074112 0.0% 100.0% 0.0% flops inhomo. stacks 1682398248960 100.0% 0.0% 0.0% flops total 13.129818E+12 12.8% 87.2% 0.0% flops max/rank 3.360079E+12 13.8% 86.2% 0.0% matmuls inhomo. stacks 46640 100.0% 0.0% 0.0% matmuls total 531185346 0.0% 100.0% 0.0% number of processed stacks 996500 4.7% 95.3% 0.0% average stack size 1.0 559.2 0.0 marketing flops 15.646297E+12 ------------------------------------------------------------------------------- # multiplications 2055 max memory usage/rank 8.107950E+09 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 16440 MPI messages size (bytes): total size 113.041801E+09 min size 0.000000E+00 max size 52.428800E+06 average size 6.876022E+06 MPI breakdown and total messages size (bytes): size <= 128 110 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 22 1441792 131072 < size <= 4194304 7388 3873439744 4194304 < size <= 16777216 7452 39069941760 16777216 < size 1468 70097023168 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 19 12. MP_Allreduce 10941 25. MP_Alltoall 8043 1122409. MP_ISend 16396 1767280. MP_IRecv 16396 1767297. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3462 67097. MP_Allreduce 9752 980. MP_Sync 52 MP_Alltoall 1474 32339426. MP_SendRecv 990 720533. MP_ISendRecv 990 720533. MP_Wait 2926 MP_ISend 1452 2662400. MP_IRecv 1452 2662400. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.046 0.061 137.884 137.887 qs_mol_dyn_low 1 2.0 0.019 0.020 137.234 137.246 qs_forces 11 3.9 0.014 0.014 137.099 137.106 qs_energies 11 4.9 0.007 0.008 128.983 128.991 scf_env_do_scf 11 5.9 0.002 0.005 112.681 112.687 velocity_verlet 10 3.0 0.001 0.001 87.375 87.390 scf_env_do_scf_inner_loop 99 6.5 0.008 0.024 87.190 87.194 dbcsr_multiply_generic 2055 12.4 0.205 0.211 45.726 46.157 qs_scf_new_mos 99 7.5 0.001 0.001 42.467 42.956 qs_scf_loop_do_ot 99 8.5 0.001 0.001 42.466 42.955 ot_scf_mini 99 9.5 0.004 0.005 39.514 39.963 multiply_cannon 2055 13.4 0.517 0.557 24.883 28.316 rebuild_ks_matrix 110 8.3 0.001 0.001 27.591 28.020 qs_ks_build_kohn_sham_matrix 110 9.3 0.015 0.015 27.590 28.019 qs_ks_update_qs_env 110 7.6 0.001 0.001 25.157 25.573 qs_rho_update_rho_low 110 7.6 0.001 0.001 25.362 25.374 calculate_rho_elec 110 8.6 0.489 0.490 25.361 25.373 init_scf_loop 11 6.9 0.006 0.006 25.321 25.323 ot_mini 99 10.5 0.001 0.002 22.008 22.463 multiply_cannon_loop 2055 14.4 0.141 0.145 20.986 22.068 prepare_preconditioner 11 7.9 0.000 0.000 20.662 20.686 make_preconditioner 11 8.9 0.001 0.001 20.662 20.686 make_full_inverse_cholesky 11 9.9 0.022 0.029 18.517 18.922 make_m2s 4110 13.4 0.074 0.075 16.007 18.225 grid_collocate_task_list 110 9.6 17.735 17.866 17.735 17.866 mp_waitall_1 67234 16.8 11.876 16.232 11.876 16.232 multiply_cannon_multrec 4110 15.4 14.674 15.121 14.743 15.194 sum_up_and_integrate 110 10.3 0.002 0.002 14.821 14.826 integrate_v_rspace 110 11.3 0.003 0.003 14.768 14.773 make_images 4110 14.4 1.970 2.049 11.291 13.154 qs_ot_get_derivative 99 11.5 0.002 0.002 11.128 11.582 ot_diis_step 99 11.5 0.033 0.035 10.862 10.863 apply_preconditioner_dbcsr 110 12.6 0.001 0.001 10.352 10.710 apply_single 110 13.6 0.000 0.000 10.351 10.710 hybrid_alltoall_any 4261 16.3 0.864 2.126 7.725 10.419 grid_integrate_task_list 110 12.3 9.842 9.912 9.842 9.912 cp_fm_cholesky_invert 11 10.9 9.702 9.711 9.702 9.711 make_images_data 4110 15.4 0.053 0.058 7.036 9.700 qs_ot_get_p 110 10.4 0.001 0.001 8.754 9.021 init_scf_run 11 5.9 0.001 0.004 8.843 8.845 scf_env_initial_rho_setup 11 6.9 0.002 0.002 8.842 8.843 fft_wrap_pw1pw2 1111 11.6 0.015 0.016 8.435 8.438 wfi_extrapolate 11 7.9 0.001 0.001 7.785 7.786 fft_wrap_pw1pw2_140 451 12.1 0.954 0.958 7.436 7.443 mp_allgather_i34 2055 14.4 2.445 6.634 2.445 6.634 fft3d_ps 1111 13.6 3.624 3.642 6.297 6.304 multiply_cannon_metrocomm3 4110 15.4 0.008 0.009 4.580 6.288 dbcsr_complete_redistribute 325 12.2 2.412 2.433 5.899 6.184 qs_energies_init_hamiltonians 11 5.9 0.001 0.001 6.104 6.106 density_rs2pw 110 9.6 0.005 0.005 5.850 5.974 qs_ot_p2m_diag 48 11.0 0.243 0.253 5.295 5.309 mp_alltoall_d11v 2046 13.8 5.138 5.263 5.138 5.263 dbcsr_make_dense_low 5207 15.5 0.054 0.055 4.757 5.156 copy_dbcsr_to_fm 151 11.3 0.003 0.003 4.958 5.087 make_dense_data 5207 16.5 4.325 4.627 4.684 5.081 cp_dbcsr_syevd 48 12.0 0.004 0.004 4.757 4.758 dbcsr_make_images_dense 3552 14.7 0.027 0.028 4.328 4.721 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 4.498 4.513 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 4.047 4.215 cp_fm_diag_elpa 48 13.0 0.000 0.000 4.007 4.008 cp_fm_diag_elpa_base 48 14.0 3.677 3.734 4.006 4.006 cp_fm_cholesky_decompose 22 10.9 3.745 3.809 3.745 3.809 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 3.702 3.730 qs_ot_get_derivative_diag 47 12.0 0.002 0.002 3.401 3.677 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 3.611 3.643 copy_fm_to_dbcsr 174 11.2 0.001 0.001 3.377 3.601 calculate_dm_sparse 110 9.5 0.001 0.001 3.457 3.501 potential_pw2rs 110 12.3 0.045 0.046 3.460 3.464 qs_env_update_s_mstruct 11 6.9 0.001 0.001 3.364 3.419 qs_ot_get_derivative_taylor 52 13.0 0.002 0.002 3.132 3.320 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 3.134 3.215 dbcsr_copy 1918 11.9 0.302 0.304 2.999 3.042 dbcsr_dot_sd 1091 11.9 2.182 2.191 2.698 2.987 qs_create_task_list 11 7.9 0.000 0.000 2.748 2.770 generate_qs_task_list 11 8.9 1.395 1.461 2.748 2.770 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="216", plot="h2o_128_md", label="(4n/1r/36t)", y=137.887000, yerr=0.000000 PlotPoint: name="217", plot="h2o_128_md_mem", label="(4n/1r/36t)", y=5903.181818, yerr=1468.925756 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/21/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 64 x 64 x 64 30366760960 0.0% 100.0% 0.0% flops 64 x 64 x 96 91100282880 0.0% 100.0% 0.0% flops 64 x 96 x 64 91100282880 0.0% 100.0% 0.0% flops 96 x 64 x 64 91100282880 0.0% 100.0% 0.0% flops 64 x 64 x 849 203420073984 0.0% 100.0% 0.0% flops 64 x 64 x 853 204378472448 0.0% 100.0% 0.0% flops 64 x 64 x 858 205576470528 0.0% 100.0% 0.0% flops 849 x 64 x 64 250602848256 0.0% 100.0% 0.0% flops 853 x 64 x 64 251783544832 0.0% 100.0% 0.0% flops 858 x 64 x 64 253259415552 0.0% 100.0% 0.0% flops 96 x 96 x 64 273300848640 0.0% 100.0% 0.0% flops 64 x 96 x 96 273300848640 0.0% 100.0% 0.0% flops 96 x 64 x 96 273300848640 0.0% 100.0% 0.0% flops 9 x 9 x 64 355059998208 0.0% 100.0% 0.0% flops 22 x 9 x 64 493014297600 0.0% 100.0% 0.0% flops 9 x 22 x 64 494442584064 0.0% 100.0% 0.0% flops 64 x 96 x 849 610260221952 0.0% 100.0% 0.0% flops 96 x 64 x 849 610260221952 0.0% 100.0% 0.0% flops 64 x 96 x 853 613135417344 0.0% 100.0% 0.0% flops 96 x 64 x 853 613135417344 0.0% 100.0% 0.0% flops 64 x 96 x 858 616729411584 0.0% 100.0% 0.0% flops 96 x 64 x 858 616729411584 0.0% 100.0% 0.0% flops 22 x 22 x 64 683571924992 0.0% 100.0% 0.0% flops 849 x 64 x 96 751808544768 0.0% 100.0% 0.0% flops 849 x 96 x 64 751808544768 0.0% 100.0% 0.0% flops 853 x 96 x 64 755350634496 0.0% 100.0% 0.0% flops 853 x 64 x 96 755350634496 0.0% 100.0% 0.0% flops 849 x 64 x 849 755814629376 0.0% 100.0% 0.0% flops 849 x 64 x 853 759375593472 0.0% 100.0% 0.0% flops 853 x 64 x 849 759375593472 0.0% 100.0% 0.0% flops 858 x 64 x 96 759778246656 0.0% 100.0% 0.0% flops 858 x 96 x 64 759778246656 0.0% 100.0% 0.0% flops 853 x 64 x 853 762953334784 0.0% 100.0% 0.0% flops 858 x 64 x 849 763826798592 0.0% 100.0% 0.0% flops 849 x 64 x 858 763826798592 0.0% 100.0% 0.0% flops 858 x 64 x 853 767425511424 0.0% 100.0% 0.0% flops 853 x 64 x 858 767425511424 0.0% 100.0% 0.0% flops 858 x 64 x 858 771923902464 0.0% 100.0% 0.0% flops 96 x 96 x 96 819902545920 0.0% 100.0% 0.0% flops 9 x 9 x 96 1065179994624 0.0% 100.0% 0.0% flops 22 x 9 x 96 1479042892800 0.0% 100.0% 0.0% flops 9 x 22 x 96 1483327752192 0.0% 100.0% 0.0% flops 96 x 96 x 849 1830780665856 0.0% 100.0% 0.0% flops 96 x 96 x 853 1839406252032 0.0% 100.0% 0.0% flops 96 x 96 x 858 1850188234752 0.0% 100.0% 0.0% flops 22 x 22 x 96 2050715774976 0.0% 100.0% 0.0% flops 849 x 96 x 96 2255425634304 0.0% 100.0% 0.0% flops 853 x 96 x 96 2266051903488 0.0% 100.0% 0.0% flops 849 x 96 x 849 2267443888128 0.0% 100.0% 0.0% flops 853 x 96 x 849 2278126780416 0.0% 100.0% 0.0% flops 849 x 96 x 853 2278126780416 0.0% 100.0% 0.0% flops 858 x 96 x 96 2279334739968 0.0% 100.0% 0.0% flops 853 x 96 x 853 2288860004352 0.0% 100.0% 0.0% flops 858 x 96 x 849 2291480395776 0.0% 100.0% 0.0% flops 849 x 96 x 858 2291480395776 0.0% 100.0% 0.0% flops 853 x 96 x 858 2302276534272 0.0% 100.0% 0.0% flops 858 x 96 x 853 2302276534272 0.0% 100.0% 0.0% flops 858 x 96 x 858 2315771707392 0.0% 100.0% 0.0% flops 9 x 32 x 9 5962613575680 0.0% 100.0% 0.0% flops 9 x 32 x 22 8325932617728 0.0% 100.0% 0.0% flops 22 x 32 x 9 8325932617728 0.0% 100.0% 0.0% flops 22 x 32 x 22 11452938371072 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 93.514751E+12 0.0% 100.0% 0.0% flops max/rank 743.796131E+09 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 3090542384 0.0% 100.0% 0.0% number of processed stacks 8143364 0.0% 100.0% 0.0% average stack size 0.0 379.5 0.0 marketing flops 144.582793E+12 ------------------------------------------------------------------------------- # multiplications 2507 max memory usage/rank 302.350336E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 7942176 MPI messages size (bytes): total size 4.882017E+12 min size 0.000000E+00 max size 5.889312E+06 average size 614.695188E+03 MPI breakdown and total messages size (bytes): size <= 128 50820 0 128 < size <= 8192 0 0 8192 < size <= 32768 419100 13717209088 32768 < size <= 131072 3341184 205282344960 131072 < size <= 4194304 3928320 3481881758608 4194304 < size <= 16777216 202752 1181116006400 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 65 12. MP_Allreduce 13450 37. MP_Alltoall 9654 122761. MP_ISend 120292 295011. MP_IRecv 120292 293276. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3992 57803. MP_Allreduce 11060 761. MP_Sync 87 MP_Alltoall 2483 1586459. MP_SendRecv 36608 12928. MP_ISendRecv 36608 12928. MP_Wait 53492 MP_ISend 14748 95649. MP_IRecv 14748 95649. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.026 0.068 172.981 172.986 qs_mol_dyn_low 1 2.0 0.026 0.094 172.466 172.497 qs_forces 11 3.9 0.005 0.006 172.237 172.255 qs_energies 11 4.9 0.001 0.002 166.309 166.353 scf_env_do_scf 11 5.9 0.001 0.001 152.681 152.697 scf_env_do_scf_inner_loop 117 6.6 0.003 0.010 130.793 130.796 velocity_verlet 10 3.0 0.001 0.002 110.052 110.056 dbcsr_multiply_generic 2507 12.6 0.183 0.228 86.484 87.816 qs_scf_new_mos 117 7.6 0.001 0.001 78.600 80.180 qs_scf_loop_do_ot 117 8.6 0.001 0.001 78.599 80.179 ot_scf_mini 117 9.6 0.003 0.004 72.713 73.755 multiply_cannon 2507 13.6 0.234 0.283 64.196 68.844 multiply_cannon_loop 2507 14.6 0.318 0.467 59.880 64.267 multiply_cannon_multrec 30084 15.6 39.306 46.633 39.324 46.651 ot_mini 117 10.6 0.001 0.001 42.009 43.122 rebuild_ks_matrix 128 8.3 0.001 0.001 39.890 40.590 qs_ks_build_kohn_sham_matrix 128 9.3 0.016 0.020 39.890 40.589 qs_ks_update_qs_env 128 7.6 0.001 0.002 36.033 36.681 mp_waitall_1 319528 16.5 27.333 35.692 27.333 35.692 qs_ot_get_derivative 117 11.6 0.001 0.002 21.847 22.901 sum_up_and_integrate 128 10.3 0.002 0.004 22.219 22.269 integrate_v_rspace 128 11.3 0.004 0.005 22.160 22.223 init_scf_loop 11 6.9 0.000 0.000 21.786 21.788 qs_rho_update_rho_low 128 7.7 0.001 0.001 21.316 21.570 calculate_rho_elec 128 8.7 0.038 0.054 21.315 21.569 apply_preconditioner_dbcsr 128 12.6 0.000 0.001 19.997 21.129 apply_single 128 13.6 0.001 0.001 19.996 21.129 multiply_cannon_metrocomm3 30084 15.6 0.107 0.208 5.175 20.538 ot_diis_step 117 11.6 0.008 0.009 19.987 19.989 make_m2s 5014 13.6 0.115 0.146 17.279 18.168 prepare_preconditioner 11 7.9 0.000 0.000 16.769 16.895 make_preconditioner 11 8.9 0.000 0.000 16.769 16.895 qs_ot_get_p 128 10.4 0.001 0.001 14.991 16.286 make_images 5014 14.6 0.305 0.366 15.143 16.138 multiply_cannon_metrocomm1 30084 15.6 0.138 0.246 11.911 15.764 make_full_inverse_cholesky 11 9.9 0.000 0.000 14.543 14.861 grid_integrate_task_list 128 12.3 12.561 13.018 12.561 13.018 make_images_data 5014 15.6 0.071 0.108 9.515 11.734 hybrid_alltoall_any 5200 16.5 0.270 2.751 7.727 11.561 grid_collocate_task_list 128 9.7 10.691 11.188 10.691 11.188 qs_ot_get_derivative_diag 77 12.4 0.002 0.003 9.744 10.514 init_scf_run 11 5.9 0.000 0.001 10.293 10.295 scf_env_initial_rho_setup 11 6.9 0.000 0.001 10.293 10.295 qs_ot_p2m_diag 83 11.4 0.045 0.058 9.664 9.691 wfi_extrapolate 11 7.9 0.001 0.001 9.263 9.263 cp_dbcsr_syevd 83 12.4 0.005 0.006 9.009 9.013 density_rs2pw 128 9.7 0.006 0.010 8.078 8.502 fft_wrap_pw1pw2 1291 11.7 0.018 0.021 7.927 8.227 mp_alltoall_d11v 2415 14.1 7.650 8.108 7.650 8.108 fft3d_ps 1291 13.7 1.840 2.303 6.966 7.225 calculate_dm_sparse 128 9.5 0.001 0.001 6.532 7.167 mp_sum_l 12367 13.3 3.405 7.162 3.405 7.162 potential_pw2rs 128 12.3 0.007 0.011 6.862 6.899 fft_wrap_pw1pw2_140 523 12.2 0.212 0.231 6.569 6.898 cp_fm_cholesky_invert 11 10.9 6.740 6.751 6.740 6.751 mp_irecv_dv 71972 16.2 2.726 6.683 2.726 6.683 cp_fm_cholesky_decompose 22 10.9 6.324 6.387 6.324 6.387 multiply_cannon_metrocomm4 27577 15.6 0.103 0.210 2.428 6.271 mp_allgather_i34 2507 14.6 1.834 5.652 1.834 5.652 cp_fm_diag_elpa 83 13.4 0.000 0.001 5.538 5.543 cp_fm_diag_elpa_base 83 14.4 5.478 5.502 5.527 5.527 make_images_sizes 5014 15.6 0.007 0.014 2.697 5.353 mp_alltoall_i44 5014 16.6 2.690 5.347 2.690 5.347 mp_waitany 14748 13.8 4.459 5.171 4.459 5.171 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.003 4.944 4.977 dbcsr_complete_redistribute 395 12.7 0.496 0.634 4.495 4.946 transfer_rs2pw 523 10.6 0.007 0.008 4.259 4.666 mp_sum_d 4469 12.1 2.634 4.599 2.634 4.599 transfer_pw2rs 523 13.3 0.006 0.008 4.536 4.564 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 4.226 4.282 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 4.015 4.220 mp_alltoall_z22v 1291 15.7 3.723 4.023 3.723 4.023 qs_ot_get_derivative_taylor 40 13.0 0.001 0.002 3.566 3.849 copy_fm_to_dbcsr 209 11.7 0.001 0.002 3.354 3.740 dbcsr_dot_sd 1318 12.0 0.518 0.571 1.938 3.698 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="400", plot="h2o_256_md", label="(4n/36r/1t)", y=172.986000, yerr=0.000000 PlotPoint: name="401", plot="h2o_256_md_mem", label="(4n/36r/1t)", y=285.636364, yerr=4.395715 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/22/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 64 x 64 x 64 30366760960 0.0% 100.0% 0.0% flops 778 x 64 x 64 57411371008 0.0% 100.0% 0.0% flops 782 x 64 x 64 57706545152 0.0% 100.0% 0.0% flops 800 x 64 x 64 59034828800 0.0% 100.0% 0.0% flops 907 x 64 x 64 66930737152 0.0% 100.0% 0.0% flops 920 x 64 x 64 67890053120 0.0% 100.0% 0.0% flops 929 x 64 x 64 68554194944 0.0% 100.0% 0.0% flops 942 x 64 x 64 69513510912 0.0% 100.0% 0.0% flops 778 x 64 x 840 85658173440 0.0% 100.0% 0.0% flops 782 x 64 x 840 86098575360 0.0% 100.0% 0.0% flops 800 x 64 x 840 88080384000 0.0% 100.0% 0.0% flops 64 x 64 x 96 91100282880 0.0% 100.0% 0.0% flops 96 x 64 x 64 91100282880 0.0% 100.0% 0.0% flops 64 x 96 x 64 91100282880 0.0% 100.0% 0.0% flops 907 x 64 x 840 99861135360 0.0% 100.0% 0.0% flops 64 x 64 x 840 100631838720 0.0% 100.0% 0.0% flops 920 x 64 x 840 101292441600 0.0% 100.0% 0.0% flops 929 x 64 x 840 102283345920 0.0% 100.0% 0.0% flops 942 x 64 x 840 103714652160 0.0% 100.0% 0.0% flops 951 x 64 x 64 140355305472 0.0% 100.0% 0.0% flops 760 x 64 x 64 168249262080 0.0% 100.0% 0.0% flops 778 x 64 x 96 172234113024 0.0% 100.0% 0.0% flops 778 x 96 x 64 172234113024 0.0% 100.0% 0.0% flops 782 x 64 x 96 173119635456 0.0% 100.0% 0.0% flops 782 x 96 x 64 173119635456 0.0% 100.0% 0.0% flops 778 x 64 x 849 173151879168 0.0% 100.0% 0.0% flops 782 x 64 x 849 174042120192 0.0% 100.0% 0.0% flops 778 x 64 x 862 175803203584 0.0% 100.0% 0.0% flops 782 x 64 x 862 176707076096 0.0% 100.0% 0.0% flops 800 x 64 x 96 177104486400 0.0% 100.0% 0.0% flops 800 x 96 x 64 177104486400 0.0% 100.0% 0.0% flops 800 x 64 x 849 178048204800 0.0% 100.0% 0.0% flops 800 x 64 x 862 180774502400 0.0% 100.0% 0.0% flops 907 x 64 x 96 200792211456 0.0% 100.0% 0.0% flops 907 x 96 x 64 200792211456 0.0% 100.0% 0.0% flops 907 x 64 x 849 201862152192 0.0% 100.0% 0.0% flops 64 x 64 x 849 203420073984 0.0% 100.0% 0.0% flops 920 x 64 x 96 203670159360 0.0% 100.0% 0.0% flops 920 x 96 x 64 203670159360 0.0% 100.0% 0.0% flops 920 x 64 x 849 204755435520 0.0% 100.0% 0.0% flops 907 x 64 x 862 204953092096 0.0% 100.0% 0.0% flops 929 x 64 x 96 205662584832 0.0% 100.0% 0.0% flops 929 x 96 x 64 205662584832 0.0% 100.0% 0.0% flops 64 x 64 x 862 206534868992 0.0% 100.0% 0.0% flops 929 x 64 x 849 206758477824 0.0% 100.0% 0.0% flops 920 x 64 x 862 207890677760 0.0% 100.0% 0.0% flops 942 x 96 x 64 208540532736 0.0% 100.0% 0.0% flops 942 x 64 x 96 208540532736 0.0% 100.0% 0.0% flops 951 x 64 x 840 209411112960 0.0% 100.0% 0.0% flops 942 x 64 x 849 209651761152 0.0% 100.0% 0.0% flops 929 x 64 x 862 209924390912 0.0% 100.0% 0.0% flops 942 x 64 x 862 212861976576 0.0% 100.0% 0.0% flops 760 x 64 x 840 251029094400 0.0% 100.0% 0.0% flops 778 x 96 x 840 256974520320 0.0% 100.0% 0.0% flops 782 x 96 x 840 258295726080 0.0% 100.0% 0.0% flops 800 x 96 x 840 264241152000 0.0% 100.0% 0.0% flops 96 x 96 x 64 273300848640 0.0% 100.0% 0.0% flops 96 x 64 x 96 273300848640 0.0% 100.0% 0.0% flops 64 x 96 x 96 273300848640 0.0% 100.0% 0.0% flops 907 x 96 x 840 299583406080 0.0% 100.0% 0.0% flops 96 x 64 x 840 301895516160 0.0% 100.0% 0.0% flops 64 x 96 x 840 301895516160 0.0% 100.0% 0.0% flops 920 x 96 x 840 303877324800 0.0% 100.0% 0.0% flops 929 x 96 x 840 306850037760 0.0% 100.0% 0.0% flops 942 x 96 x 840 311143956480 0.0% 100.0% 0.0% flops 9 x 9 x 64 355060288512 0.0% 100.0% 0.0% flops 951 x 64 x 96 421065916416 0.0% 100.0% 0.0% flops 951 x 96 x 64 421065916416 0.0% 100.0% 0.0% flops 951 x 64 x 849 423309606912 0.0% 100.0% 0.0% flops 951 x 64 x 862 429791379456 0.0% 100.0% 0.0% flops 22 x 9 x 64 493014297600 0.0% 100.0% 0.0% flops 9 x 22 x 64 494442584064 0.0% 100.0% 0.0% flops 760 x 96 x 64 504747786240 0.0% 100.0% 0.0% flops 760 x 64 x 96 504747786240 0.0% 100.0% 0.0% flops 760 x 64 x 849 507437383680 0.0% 100.0% 0.0% flops 760 x 64 x 862 515207331840 0.0% 100.0% 0.0% flops 778 x 96 x 96 516702339072 0.0% 100.0% 0.0% flops 782 x 96 x 96 519358906368 0.0% 100.0% 0.0% flops 778 x 96 x 849 519455637504 0.0% 100.0% 0.0% flops 782 x 96 x 849 522126360576 0.0% 100.0% 0.0% flops 778 x 96 x 862 527409610752 0.0% 100.0% 0.0% flops 782 x 96 x 862 530121228288 0.0% 100.0% 0.0% flops 800 x 96 x 96 531313459200 0.0% 100.0% 0.0% flops 800 x 96 x 849 534144614400 0.0% 100.0% 0.0% flops 800 x 96 x 862 542323507200 0.0% 100.0% 0.0% flops 907 x 96 x 96 602376634368 0.0% 100.0% 0.0% flops 907 x 96 x 849 605586456576 0.0% 100.0% 0.0% flops 96 x 64 x 849 610260221952 0.0% 100.0% 0.0% flops 64 x 96 x 849 610260221952 0.0% 100.0% 0.0% flops 920 x 96 x 96 611010478080 0.0% 100.0% 0.0% flops 920 x 96 x 849 614266306560 0.0% 100.0% 0.0% flops 907 x 96 x 862 614859276288 0.0% 100.0% 0.0% flops 929 x 96 x 96 616987754496 0.0% 100.0% 0.0% flops 96 x 64 x 862 619604606976 0.0% 100.0% 0.0% flops 64 x 96 x 862 619604606976 0.0% 100.0% 0.0% flops 929 x 96 x 849 620275433472 0.0% 100.0% 0.0% flops 920 x 96 x 862 623672033280 0.0% 100.0% 0.0% flops 942 x 96 x 96 625621598208 0.0% 100.0% 0.0% flops 951 x 96 x 840 628233338880 0.0% 100.0% 0.0% flops 942 x 96 x 849 628955283456 0.0% 100.0% 0.0% flops 929 x 96 x 862 629773172736 0.0% 100.0% 0.0% flops 942 x 96 x 862 638585929728 0.0% 100.0% 0.0% flops 22 x 22 x 64 683571924992 0.0% 100.0% 0.0% flops 760 x 96 x 840 753087283200 0.0% 100.0% 0.0% flops 96 x 96 x 96 819902545920 0.0% 100.0% 0.0% flops 96 x 96 x 840 905686548480 0.0% 100.0% 0.0% flops 9 x 9 x 96 1065180865536 0.0% 100.0% 0.0% flops 951 x 96 x 96 1263197749248 0.0% 100.0% 0.0% flops 951 x 96 x 849 1269928820736 0.0% 100.0% 0.0% flops 951 x 96 x 862 1289374138368 0.0% 100.0% 0.0% flops 22 x 9 x 96 1479042892800 0.0% 100.0% 0.0% flops 9 x 22 x 96 1483327752192 0.0% 100.0% 0.0% flops 760 x 96 x 96 1514243358720 0.0% 100.0% 0.0% flops 760 x 96 x 849 1522312151040 0.0% 100.0% 0.0% flops 760 x 96 x 862 1545621995520 0.0% 100.0% 0.0% flops 96 x 96 x 849 1830780665856 0.0% 100.0% 0.0% flops 96 x 96 x 862 1858813820928 0.0% 100.0% 0.0% flops 22 x 22 x 96 2050715774976 0.0% 100.0% 0.0% flops 9 x 32 x 9 5962618884096 0.0% 100.0% 0.0% flops 22 x 32 x 9 8325932617728 0.0% 100.0% 0.0% flops 9 x 32 x 22 8325932617728 0.0% 100.0% 0.0% flops 22 x 32 x 22 11452938371072 0.0% 100.0% 0.0% flops inhomo. stacks 6920499888128 100.0% 0.0% 0.0% flops total 94.184293E+12 7.3% 92.7% 0.0% flops max/rank 1.486220E+12 7.4% 92.6% 0.0% matmuls inhomo. stacks 168480 100.0% 0.0% 0.0% matmuls total 3090543492 0.0% 100.0% 0.0% number of processed stacks 8217740 2.1% 97.9% 0.0% average stack size 1.0 383.9 0.0 marketing flops 144.582793E+12 ------------------------------------------------------------------------------- # multiplications 2507 max memory usage/rank 411.340800E+06 # max total images/rank 2 # max 3D layers 1 # MPI messages exchanged 3790584 MPI messages size (bytes): total size 4.296164E+12 min size 0.000000E+00 max size 11.799056E+06 average size 1.133378E+06 MPI breakdown and total messages size (bytes): size <= 128 23892 0 128 < size <= 8192 0 0 8192 < size <= 32768 118100 3862691840 32768 < size <= 131072 1497000 117745909760 131072 < size <= 4194304 1818160 1529661358080 4194304 < size <= 16777216 333432 2644950566544 16777216 < size 0 0 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3992 58131. MP_Allreduce 11058 879. MP_Sync 87 MP_Alltoall 1969 4394467. MP_SendRecv 18176 24736. MP_ISendRecv 18176 24736. MP_Wait 36332 MP_ISend 16020 128768. MP_IRecv 16020 128768. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.022 0.056 272.943 272.947 qs_mol_dyn_low 1 2.0 0.016 0.042 272.324 272.391 qs_forces 11 3.9 0.007 0.021 272.105 272.117 qs_energies 11 4.9 0.003 0.008 262.744 262.797 scf_env_do_scf 11 5.9 0.001 0.006 242.158 242.190 scf_env_do_scf_inner_loop 117 6.6 0.004 0.041 211.588 211.594 velocity_verlet 10 3.0 0.007 0.023 172.213 172.218 dbcsr_multiply_generic 2507 12.6 0.253 0.279 145.560 149.534 qs_scf_new_mos 117 7.6 0.001 0.002 130.468 132.309 qs_scf_loop_do_ot 117 8.6 0.001 0.002 130.467 132.308 ot_scf_mini 117 9.6 0.004 0.007 121.312 123.022 multiply_cannon 2507 13.6 0.338 0.428 110.109 117.608 multiply_cannon_loop 2507 14.6 0.602 0.678 103.397 113.452 multiply_cannon_multrec 30084 15.6 83.267 97.140 83.307 97.179 ot_mini 117 10.6 0.001 0.002 72.987 75.076 rebuild_ks_matrix 128 8.3 0.001 0.001 61.061 62.451 qs_ks_build_kohn_sham_matrix 128 9.3 0.022 0.030 61.060 62.450 mp_waitall_1 240928 16.6 24.561 57.355 24.561 57.355 qs_ks_update_qs_env 128 7.6 0.001 0.002 55.150 56.452 multiply_cannon_metrocomm3 30084 15.6 0.155 0.177 10.318 43.740 apply_preconditioner_dbcsr 128 12.6 0.001 0.001 37.158 39.521 apply_single 128 13.6 0.001 0.002 37.157 39.520 ot_diis_step 117 11.6 0.018 0.021 38.174 38.177 qs_ot_get_derivative 117 11.6 0.002 0.004 34.477 36.267 qs_rho_update_rho_low 128 7.7 0.001 0.001 33.891 34.195 calculate_rho_elec 128 8.7 0.075 0.081 33.891 34.194 sum_up_and_integrate 128 10.3 0.004 0.008 31.320 31.357 integrate_v_rspace 128 11.3 0.005 0.006 31.213 31.255 init_scf_loop 11 6.9 0.001 0.001 30.429 30.433 make_m2s 5014 13.6 0.160 0.175 24.908 25.855 grid_collocate_task_list 128 9.7 21.882 23.019 21.882 23.019 qs_ot_get_p 128 10.4 0.001 0.002 20.181 22.896 prepare_preconditioner 11 7.9 0.000 0.000 22.573 22.799 make_preconditioner 11 8.9 0.000 0.000 22.573 22.799 make_images 5014 14.6 1.352 1.713 21.126 22.297 grid_integrate_task_list 128 12.3 20.883 21.914 20.883 21.914 make_full_inverse_cholesky 11 9.9 0.000 0.001 21.184 21.590 qs_ot_get_derivative_diag 77 12.4 0.003 0.004 15.040 16.361 multiply_cannon_metrocomm4 27577 15.6 0.168 0.195 6.132 15.751 init_scf_run 11 5.9 0.000 0.008 15.480 15.481 scf_env_initial_rho_setup 11 6.9 0.000 0.009 15.480 15.481 mp_irecv_dv 74558 16.2 6.210 15.475 6.210 15.475 mp_sum_l 12367 13.3 7.060 15.019 7.060 15.019 make_images_data 5014 15.6 0.092 0.112 12.418 14.584 hybrid_alltoall_any 5200 16.5 0.463 2.840 10.652 14.148 wfi_extrapolate 11 7.9 0.002 0.005 13.842 13.842 qs_ot_p2m_diag 83 11.4 0.115 0.151 11.998 12.064 fft_wrap_pw1pw2 1291 11.7 0.029 0.042 11.519 11.766 cp_fm_cholesky_invert 11 10.9 11.505 11.533 11.505 11.533 cp_dbcsr_syevd 83 12.4 0.006 0.007 11.144 11.149 calculate_dm_sparse 128 9.5 0.001 0.002 10.124 11.093 density_rs2pw 128 9.7 0.009 0.010 9.571 10.568 fft_wrap_pw1pw2_140 523 12.2 0.531 0.563 10.267 10.555 fft3d_ps 1291 13.7 4.258 4.552 9.094 9.306 mp_sum_d 4469 12.1 4.475 8.404 4.475 8.404 dbcsr_dot_sd 1318 12.0 1.039 1.132 4.501 8.373 cp_dbcsr_sm_fm_multiply 37 9.5 0.003 0.004 8.194 8.222 mp_alltoall_d11v 2415 14.1 6.762 7.730 6.762 7.730 qs_ot_get_orbitals 117 10.6 0.001 0.002 7.350 7.552 cp_fm_diag_elpa 83 13.4 0.001 0.001 7.426 7.434 potential_pw2rs 128 12.3 0.018 0.020 7.361 7.423 cp_fm_diag_elpa_base 83 14.4 7.274 7.297 7.412 7.412 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 6.616 7.050 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.544 6.668 cp_fm_cholesky_decompose 22 10.9 6.318 6.377 6.318 6.377 qs_ot_get_derivative_taylor 40 13.0 0.002 0.002 5.569 6.066 multiply_cannon_metrocomm1 30084 15.6 0.099 0.109 2.277 5.774 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="402", plot="h2o_256_md", label="(4n/18r/2t)", y=272.947000, yerr=0.000000 PlotPoint: name="403", plot="h2o_256_md_mem", label="(4n/18r/2t)", y=388.909091, yerr=5.212826 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/23/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 128 x 64 x 64 15032385536 0.0% 100.0% 0.0% flops 64 x 64 x 64 37580963840 0.0% 100.0% 0.0% flops 128 x 96 x 64 45097156608 0.0% 100.0% 0.0% flops 128 x 64 x 96 45097156608 0.0% 100.0% 0.0% flops 96 x 64 x 64 67645734912 0.0% 100.0% 0.0% flops 128 x 64 x 862 102363561984 0.0% 100.0% 0.0% flops 64 x 96 x 64 112742891520 0.0% 100.0% 0.0% flops 64 x 64 x 96 112742891520 0.0% 100.0% 0.0% flops 876 x 64 x 64 128138084352 0.0% 100.0% 0.0% flops 911 x 64 x 64 133257756672 0.0% 100.0% 0.0% flops 128 x 96 x 96 135291469824 0.0% 100.0% 0.0% flops 929 x 64 x 64 135890731008 0.0% 100.0% 0.0% flops 964 x 64 x 64 141010403328 0.0% 100.0% 0.0% flops 128 x 64 x 849 201639591936 0.0% 100.0% 0.0% flops 96 x 64 x 96 202937204736 0.0% 100.0% 0.0% flops 96 x 96 x 64 202937204736 0.0% 100.0% 0.0% flops 720 x 64 x 64 210637946880 0.0% 100.0% 0.0% flops 64 x 64 x 862 255908904960 0.0% 100.0% 0.0% flops 128 x 96 x 862 307090685952 0.0% 100.0% 0.0% flops 64 x 96 x 96 338228674560 0.0% 100.0% 0.0% flops 9 x 9 x 64 352505530368 0.0% 100.0% 0.0% flops 876 x 64 x 96 384414253056 0.0% 100.0% 0.0% flops 876 x 96 x 64 384414253056 0.0% 100.0% 0.0% flops 876 x 64 x 862 392803221504 0.0% 100.0% 0.0% flops 911 x 96 x 64 399773270016 0.0% 100.0% 0.0% flops 911 x 64 x 96 399773270016 0.0% 100.0% 0.0% flops 929 x 96 x 64 407672193024 0.0% 100.0% 0.0% flops 929 x 64 x 96 407672193024 0.0% 100.0% 0.0% flops 911 x 64 x 862 408497414144 0.0% 100.0% 0.0% flops 929 x 64 x 862 416568713216 0.0% 100.0% 0.0% flops 964 x 64 x 96 423031209984 0.0% 100.0% 0.0% flops 964 x 96 x 64 423031209984 0.0% 100.0% 0.0% flops 964 x 64 x 862 432262905856 0.0% 100.0% 0.0% flops 96 x 64 x 862 460636028928 0.0% 100.0% 0.0% flops 22 x 9 x 64 489467860992 0.0% 100.0% 0.0% flops 9 x 22 x 64 490886212608 0.0% 100.0% 0.0% flops 64 x 64 x 849 504098979840 0.0% 100.0% 0.0% flops 128 x 96 x 849 604918775808 0.0% 100.0% 0.0% flops 96 x 96 x 96 608811614208 0.0% 100.0% 0.0% flops 720 x 96 x 64 631913840640 0.0% 100.0% 0.0% flops 720 x 64 x 96 631913840640 0.0% 100.0% 0.0% flops 720 x 64 x 862 645703925760 0.0% 100.0% 0.0% flops 22 x 22 x 64 678653927424 0.0% 100.0% 0.0% flops 64 x 96 x 862 767726714880 0.0% 100.0% 0.0% flops 876 x 64 x 849 773758550016 0.0% 100.0% 0.0% flops 911 x 64 x 849 804673560576 0.0% 100.0% 0.0% flops 929 x 64 x 849 820572708864 0.0% 100.0% 0.0% flops 964 x 64 x 849 851487719424 0.0% 100.0% 0.0% flops 96 x 64 x 849 907378163712 0.0% 100.0% 0.0% flops 9 x 9 x 96 1057516591104 0.0% 100.0% 0.0% flops 876 x 96 x 96 1153242759168 0.0% 100.0% 0.0% flops 876 x 96 x 862 1178409664512 0.0% 100.0% 0.0% flops 911 x 96 x 96 1199319810048 0.0% 100.0% 0.0% flops 929 x 96 x 96 1223016579072 0.0% 100.0% 0.0% flops 911 x 96 x 862 1225492242432 0.0% 100.0% 0.0% flops 929 x 96 x 862 1249706139648 0.0% 100.0% 0.0% flops 964 x 96 x 96 1269093629952 0.0% 100.0% 0.0% flops 720 x 64 x 849 1271931863040 0.0% 100.0% 0.0% flops 964 x 96 x 862 1296788717568 0.0% 100.0% 0.0% flops 96 x 96 x 862 1381908086784 0.0% 100.0% 0.0% flops 22 x 9 x 96 1468403582976 0.0% 100.0% 0.0% flops 9 x 22 x 96 1472658637824 0.0% 100.0% 0.0% flops 64 x 96 x 849 1512296939520 0.0% 100.0% 0.0% flops 720 x 96 x 96 1895741521920 0.0% 100.0% 0.0% flops 720 x 96 x 862 1937111777280 0.0% 100.0% 0.0% flops 22 x 22 x 96 2035961782272 0.0% 100.0% 0.0% flops 876 x 96 x 849 2321275650048 0.0% 100.0% 0.0% flops 911 x 96 x 849 2414020681728 0.0% 100.0% 0.0% flops 929 x 96 x 849 2461718126592 0.0% 100.0% 0.0% flops 964 x 96 x 849 2554463158272 0.0% 100.0% 0.0% flops 96 x 96 x 849 2722134491136 0.0% 100.0% 0.0% flops 720 x 96 x 849 3815795589120 0.0% 100.0% 0.0% flops 9 x 32 x 9 5921911627776 0.0% 100.0% 0.0% flops 22 x 32 x 9 8269110153216 0.0% 100.0% 0.0% flops 9 x 32 x 22 8269110153216 0.0% 100.0% 0.0% flops 22 x 32 x 22 11374757920768 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 92.796573E+12 0.0% 100.0% 0.0% flops max/rank 2.166472E+12 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 3069347940 0.0% 100.0% 0.0% number of processed stacks 8209012 0.0% 100.0% 0.0% average stack size 0.0 373.9 0.0 marketing flops 143.508480E+12 ------------------------------------------------------------------------------- # multiplications 2485 max memory usage/rank 522.334208E+06 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 2385600 MPI messages size (bytes): total size 4.069300E+12 min size 0.000000E+00 max size 17.653760E+06 average size 1.705776E+06 MPI breakdown and total messages size (bytes): size <= 128 14916 0 128 < size <= 8192 0 0 8192 < size <= 32768 70188 2295595008 32768 < size <= 131072 716032 54973693952 131072 < size <= 4194304 1363760 1386318135296 4194304 < size <= 16777216 153648 1453842923456 16777216 < size 67056 1171888537600 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 4007 58142. MP_Allreduce 11096 960. MP_Sync 86 MP_Alltoall 1955 5835736. MP_SendRecv 11938 47072. MP_ISendRecv 11938 47072. MP_Wait 25718 MP_ISend 11660 212488. MP_IRecv 11660 212488. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.027 0.061 246.141 246.157 qs_mol_dyn_low 1 2.0 0.023 0.080 245.597 245.623 qs_forces 11 3.9 0.009 0.025 245.318 245.340 qs_energies 11 4.9 0.001 0.002 237.247 237.323 scf_env_do_scf 11 5.9 0.001 0.001 218.513 218.540 scf_env_do_scf_inner_loop 116 6.6 0.003 0.010 186.572 186.587 velocity_verlet 10 3.0 0.015 0.057 156.463 156.477 dbcsr_multiply_generic 2485 12.5 0.227 0.302 133.144 136.663 qs_scf_new_mos 116 7.6 0.001 0.001 120.444 121.766 qs_scf_loop_do_ot 116 8.6 0.001 0.002 120.443 121.765 ot_scf_mini 116 9.6 0.004 0.005 112.315 114.227 multiply_cannon 2485 13.5 0.312 0.367 99.617 105.018 multiply_cannon_loop 2485 14.5 0.564 0.710 94.716 101.670 multiply_cannon_multrec 29820 15.5 74.184 91.009 74.219 91.046 ot_mini 116 10.6 0.001 0.001 65.771 67.815 rebuild_ks_matrix 127 8.3 0.001 0.001 50.831 52.457 qs_ks_build_kohn_sham_matrix 127 9.3 0.017 0.020 50.831 52.456 mp_waitall_1 212858 16.6 24.243 52.043 24.243 52.043 qs_ks_update_qs_env 127 7.6 0.001 0.002 45.888 47.368 multiply_cannon_metrocomm3 29820 15.5 0.128 0.246 12.402 40.282 apply_preconditioner_dbcsr 127 12.6 0.000 0.001 35.278 37.875 apply_single 127 13.6 0.001 0.001 35.277 37.875 ot_diis_step 116 11.6 0.021 0.023 36.207 36.210 init_scf_loop 11 6.9 0.000 0.000 31.820 31.823 qs_ot_get_derivative 116 11.6 0.002 0.002 29.269 31.215 qs_rho_update_rho_low 127 7.7 0.001 0.001 26.702 26.916 calculate_rho_elec 127 8.7 0.108 0.131 26.701 26.915 prepare_preconditioner 11 7.9 0.000 0.000 24.863 25.018 make_preconditioner 11 8.9 0.000 0.000 24.863 25.018 sum_up_and_integrate 127 10.3 0.003 0.005 24.826 24.922 integrate_v_rspace 127 11.3 0.004 0.005 24.757 24.886 make_m2s 4970 13.5 0.146 0.186 22.779 23.874 make_full_inverse_cholesky 11 9.9 0.000 0.001 23.130 23.765 qs_ot_get_p 127 10.4 0.001 0.002 19.900 22.651 make_images 4970 14.5 1.365 1.991 19.395 20.557 grid_collocate_task_list 127 9.7 16.288 17.596 16.288 17.596 grid_integrate_task_list 127 12.3 16.730 17.431 16.730 17.431 multiply_cannon_metrocomm4 27335 15.5 0.137 0.255 6.843 17.265 mp_irecv_dv 68888 16.3 6.656 17.020 6.656 17.020 qs_ot_get_derivative_diag 76 12.4 0.003 0.004 12.620 14.046 init_scf_run 11 5.9 0.000 0.002 13.995 13.997 scf_env_initial_rho_setup 11 6.9 0.001 0.002 13.995 13.997 mp_sum_l 12261 13.2 7.256 13.924 7.256 13.924 make_images_data 4970 15.5 0.082 0.135 11.796 13.045 wfi_extrapolate 11 7.9 0.001 0.001 12.476 12.477 hybrid_alltoall_any 5155 16.4 0.328 1.408 10.289 12.195 qs_ot_p2m_diag 82 11.4 0.161 0.193 12.111 12.168 cp_fm_cholesky_invert 11 10.9 11.872 11.899 11.872 11.899 cp_dbcsr_syevd 82 12.4 0.006 0.006 11.144 11.150 density_rs2pw 127 9.7 0.007 0.010 8.163 10.267 fft_wrap_pw1pw2 1281 11.7 0.020 0.031 9.611 10.084 calculate_dm_sparse 127 9.5 0.001 0.001 9.001 9.664 mp_sum_d 4458 12.1 4.754 9.063 4.754 9.063 fft3d_ps 1281 13.7 2.958 3.788 8.003 9.005 fft_wrap_pw1pw2_140 519 12.2 0.324 0.391 8.394 8.535 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.003 7.746 7.762 cp_fm_diag_elpa 82 13.4 0.001 0.001 7.450 7.457 mp_alltoall_d11v 2401 14.1 6.088 7.446 6.088 7.446 cp_fm_diag_elpa_base 82 14.4 7.210 7.250 7.435 7.435 dbcsr_dot_sd 1305 12.0 0.912 1.052 4.411 7.348 mp_alltoall_z22v 1281 15.7 4.381 6.795 4.381 6.795 cp_fm_cholesky_decompose 22 10.9 6.505 6.692 6.505 6.692 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 6.219 6.645 qs_ot_get_orbitals 116 10.6 0.001 0.001 6.260 6.452 mp_waitany 11660 13.9 3.369 6.295 3.369 6.295 transfer_rs2pw 519 10.6 0.007 0.009 3.574 5.983 potential_pw2rs 127 12.3 0.020 0.027 5.703 5.891 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 5.478 5.632 dbcsr_complete_redistribute 393 12.7 0.977 1.181 4.526 5.629 mp_allgather_i34 2485 14.5 2.021 5.581 2.021 5.581 make_images_sizes 4970 15.5 0.008 0.016 2.698 5.550 mp_alltoall_i44 4970 16.5 2.690 5.543 2.690 5.543 qs_ot_get_derivative_taylor 40 13.0 0.001 0.002 4.811 5.340 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="404", plot="h2o_256_md", label="(4n/12r/3t)", y=246.157000, yerr=0.000000 PlotPoint: name="405", plot="h2o_256_md_mem", label="(4n/12r/3t)", y=481.454545, yerr=0.890724 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/24/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 96 x 192 x 192 25367150592 0.0% 100.0% 0.0% flops 96 x 160 x 192 42278584320 0.0% 100.0% 0.0% flops 96 x 192 x 160 42278584320 0.0% 100.0% 0.0% flops 347 x 192 x 192 57102630912 0.0% 100.0% 0.0% flops 369 x 192 x 192 60722970624 0.0% 100.0% 0.0% flops 96 x 160 x 160 70464307200 0.0% 100.0% 0.0% flops 347 x 160 x 192 95171051520 0.0% 100.0% 0.0% flops 347 x 192 x 160 95171051520 0.0% 100.0% 0.0% flops 369 x 160 x 192 101204951040 0.0% 100.0% 0.0% flops 369 x 192 x 160 101204951040 0.0% 100.0% 0.0% flops 64 x 192 x 192 101468602368 0.0% 100.0% 0.0% flops 320 x 192 x 192 105318973440 0.0% 100.0% 0.0% flops 342 x 192 x 192 112559652864 0.0% 100.0% 0.0% flops 96 x 192 x 1702 113689460736 0.0% 100.0% 0.0% flops 96 x 192 x 1707 114023448576 0.0% 100.0% 0.0% flops 96 x 192 x 1711 114290638848 0.0% 100.0% 0.0% flops 347 x 192 x 1702 115208352768 0.0% 100.0% 0.0% flops 347 x 192 x 1707 115546802688 0.0% 100.0% 0.0% flops 347 x 192 x 1711 115817562624 0.0% 100.0% 0.0% flops 369 x 192 x 1702 122512628736 0.0% 100.0% 0.0% flops 369 x 192 x 1707 122872536576 0.0% 100.0% 0.0% flops 369 x 192 x 1711 123160462848 0.0% 100.0% 0.0% flops 32 x 192 x 192 143747186688 0.0% 100.0% 0.0% flops 440 x 192 x 192 144813588480 0.0% 100.0% 0.0% flops 462 x 192 x 192 152054267904 0.0% 100.0% 0.0% flops 471 x 192 x 192 155016364032 0.0% 100.0% 0.0% flops 347 x 160 x 160 158618419200 0.0% 100.0% 0.0% flops 369 x 160 x 160 168674918400 0.0% 100.0% 0.0% flops 64 x 192 x 160 169114337280 0.0% 100.0% 0.0% flops 64 x 160 x 192 169114337280 0.0% 100.0% 0.0% flops 320 x 192 x 160 175531622400 0.0% 100.0% 0.0% flops 320 x 160 x 192 175531622400 0.0% 100.0% 0.0% flops 342 x 160 x 192 187599421440 0.0% 100.0% 0.0% flops 342 x 192 x 160 187599421440 0.0% 100.0% 0.0% flops 96 x 160 x 1702 189482434560 0.0% 100.0% 0.0% flops 96 x 160 x 1707 190039080960 0.0% 100.0% 0.0% flops 96 x 160 x 1711 190484398080 0.0% 100.0% 0.0% flops 347 x 160 x 1702 192013921280 0.0% 100.0% 0.0% flops 347 x 160 x 1707 192578004480 0.0% 100.0% 0.0% flops 347 x 160 x 1711 193029271040 0.0% 100.0% 0.0% flops 369 x 160 x 1702 204187714560 0.0% 100.0% 0.0% flops 369 x 160 x 1707 204787560960 0.0% 100.0% 0.0% flops 369 x 160 x 1711 205267438080 0.0% 100.0% 0.0% flops 320 x 192 x 1702 212488028160 0.0% 100.0% 0.0% flops 320 x 192 x 1707 213112258560 0.0% 100.0% 0.0% flops 320 x 192 x 1711 213611642880 0.0% 100.0% 0.0% flops 342 x 192 x 1702 227096580096 0.0% 100.0% 0.0% flops 342 x 192 x 1707 227763726336 0.0% 100.0% 0.0% flops 342 x 192 x 1711 228297443328 0.0% 100.0% 0.0% flops 32 x 192 x 160 239578644480 0.0% 100.0% 0.0% flops 32 x 160 x 192 239578644480 0.0% 100.0% 0.0% flops 440 x 160 x 192 241355980800 0.0% 100.0% 0.0% flops 440 x 192 x 160 241355980800 0.0% 100.0% 0.0% flops 462 x 192 x 160 253423779840 0.0% 100.0% 0.0% flops 462 x 160 x 192 253423779840 0.0% 100.0% 0.0% flops 471 x 160 x 192 258360606720 0.0% 100.0% 0.0% flops 471 x 192 x 160 258360606720 0.0% 100.0% 0.0% flops 64 x 160 x 160 281857228800 0.0% 100.0% 0.0% flops 440 x 192 x 1702 292171038720 0.0% 100.0% 0.0% flops 320 x 160 x 160 292552704000 0.0% 100.0% 0.0% flops 440 x 192 x 1707 293029355520 0.0% 100.0% 0.0% flops 440 x 192 x 1711 293716008960 0.0% 100.0% 0.0% flops 462 x 192 x 1702 306779590656 0.0% 100.0% 0.0% flops 462 x 192 x 1707 307680823296 0.0% 100.0% 0.0% flops 462 x 192 x 1711 308401809408 0.0% 100.0% 0.0% flops 342 x 160 x 160 312665702400 0.0% 100.0% 0.0% flops 471 x 192 x 1702 312755816448 0.0% 100.0% 0.0% flops 471 x 192 x 1707 313674605568 0.0% 100.0% 0.0% flops 471 x 192 x 1711 314409636864 0.0% 100.0% 0.0% flops 320 x 160 x 1702 354146713600 0.0% 100.0% 0.0% flops 320 x 160 x 1707 355187097600 0.0% 100.0% 0.0% flops 320 x 160 x 1711 356019404800 0.0% 100.0% 0.0% flops 342 x 160 x 1702 378494300160 0.0% 100.0% 0.0% flops 342 x 160 x 1707 379606210560 0.0% 100.0% 0.0% flops 342 x 160 x 1711 380495738880 0.0% 100.0% 0.0% flops 32 x 160 x 160 399297740800 0.0% 100.0% 0.0% flops 440 x 160 x 160 402259968000 0.0% 100.0% 0.0% flops 462 x 160 x 160 422372966400 0.0% 100.0% 0.0% flops 471 x 160 x 160 430601011200 0.0% 100.0% 0.0% flops 64 x 192 x 1702 454757842944 0.0% 100.0% 0.0% flops 64 x 192 x 1707 456093794304 0.0% 100.0% 0.0% flops 64 x 192 x 1711 457162555392 0.0% 100.0% 0.0% flops 440 x 160 x 1702 486951731200 0.0% 100.0% 0.0% flops 440 x 160 x 1707 488382259200 0.0% 100.0% 0.0% flops 440 x 160 x 1711 489526681600 0.0% 100.0% 0.0% flops 462 x 160 x 1702 511299317760 0.0% 100.0% 0.0% flops 462 x 160 x 1707 512801372160 0.0% 100.0% 0.0% flops 462 x 160 x 1711 514003015680 0.0% 100.0% 0.0% flops 471 x 160 x 1702 521259694080 0.0% 100.0% 0.0% flops 471 x 160 x 1707 522791009280 0.0% 100.0% 0.0% flops 471 x 160 x 1711 524016061440 0.0% 100.0% 0.0% flops 9 x 9 x 192 528759166464 0.0% 100.0% 0.0% flops 32 x 192 x 1702 644240277504 0.0% 100.0% 0.0% flops 32 x 192 x 1707 646132875264 0.0% 100.0% 0.0% flops 32 x 192 x 1711 647646953472 0.0% 100.0% 0.0% flops 22 x 9 x 192 734201791488 0.0% 100.0% 0.0% flops 9 x 22 x 192 736329318912 0.0% 100.0% 0.0% flops 449 x 192 x 192 738878423040 0.0% 100.0% 0.0% flops 64 x 160 x 1702 757929738240 0.0% 100.0% 0.0% flops 64 x 160 x 1707 760156323840 0.0% 100.0% 0.0% flops 64 x 160 x 1711 761937592320 0.0% 100.0% 0.0% flops 9 x 9 x 160 881265277440 0.0% 100.0% 0.0% flops 22 x 22 x 192 1017980891136 0.0% 100.0% 0.0% flops 32 x 160 x 1702 1073733795840 0.0% 100.0% 0.0% flops 32 x 160 x 1707 1076888125440 0.0% 100.0% 0.0% flops 32 x 160 x 1711 1079411589120 0.0% 100.0% 0.0% flops 22 x 9 x 160 1223669652480 0.0% 100.0% 0.0% flops 9 x 22 x 160 1227215531520 0.0% 100.0% 0.0% flops 449 x 192 x 160 1231464038400 0.0% 100.0% 0.0% flops 449 x 160 x 192 1231464038400 0.0% 100.0% 0.0% flops 449 x 192 x 1702 1490736322560 0.0% 100.0% 0.0% flops 449 x 192 x 1707 1495115688960 0.0% 100.0% 0.0% flops 449 x 192 x 1711 1498619182080 0.0% 100.0% 0.0% flops 22 x 22 x 160 1696634818560 0.0% 100.0% 0.0% flops 449 x 160 x 160 2052440064000 0.0% 100.0% 0.0% flops 449 x 160 x 1702 2484560537600 0.0% 100.0% 0.0% flops 449 x 160 x 1707 2491859481600 0.0% 100.0% 0.0% flops 449 x 160 x 1711 2497698636800 0.0% 100.0% 0.0% flops 9 x 32 x 9 5921921912832 0.0% 100.0% 0.0% flops 9 x 32 x 22 8269110153216 0.0% 100.0% 0.0% flops 22 x 32 x 9 8269110153216 0.0% 100.0% 0.0% flops 22 x 32 x 22 11374757920768 0.0% 100.0% 0.0% flops inhomo. stacks 3843113926656 100.0% 0.0% 0.0% flops total 92.944115E+12 4.1% 95.9% 0.0% flops max/rank 3.293516E+12 11.2% 88.8% 0.0% matmuls inhomo. stacks 49320 100.0% 0.0% 0.0% matmuls total 2942138950 0.0% 100.0% 0.0% number of processed stacks 5574806 0.9% 99.1% 0.0% average stack size 1.0 532.5 0.0 marketing flops 143.511165E+12 ------------------------------------------------------------------------------- # multiplications 2485 max memory usage/rank 644.792320E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 894600 MPI messages size (bytes): total size 2.201845E+12 min size 0.000000E+00 max size 23.420168E+06 average size 2.461262E+06 MPI breakdown and total messages size (bytes): size <= 128 5610 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 330 14417920 131072 < size <= 4194304 813420 1008310681600 4194304 < size <= 16777216 52380 660837893360 16777216 < size 22860 532676608000 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 125 12. MP_Allreduce 13451 37. MP_Alltoall 9584 367498. MP_ISend 59596 1072216. MP_IRecv 59596 1065100. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 4003 57961. MP_Allreduce 11085 1001. MP_Sync 86 MP_Alltoall 1700 8593923. MP_SendRecv 8890 54272. MP_ISendRecv 8890 54272. MP_Wait 20550 MP_ISend 9540 307877. MP_IRecv 9540 307877. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.021 0.044 258.308 258.315 qs_mol_dyn_low 1 2.0 0.007 0.015 257.843 257.869 qs_forces 11 3.9 0.004 0.005 257.699 257.713 qs_energies 11 4.9 0.002 0.003 248.176 248.261 scf_env_do_scf 11 5.9 0.001 0.004 228.025 228.041 scf_env_do_scf_inner_loop 116 6.6 0.003 0.014 198.022 198.026 velocity_verlet 10 3.0 0.001 0.001 163.302 163.309 dbcsr_multiply_generic 2485 12.5 0.236 0.296 130.240 133.368 qs_scf_new_mos 116 7.6 0.001 0.001 120.860 122.218 qs_scf_loop_do_ot 116 8.6 0.001 0.002 120.859 122.217 ot_scf_mini 116 9.6 0.004 0.005 111.915 113.229 multiply_cannon 2485 13.5 0.324 0.354 93.727 101.837 multiply_cannon_loop 2485 14.5 0.297 0.366 86.074 93.851 multiply_cannon_multrec 14910 15.5 73.125 82.775 73.160 82.807 ot_mini 116 10.6 0.001 0.001 66.176 67.553 rebuild_ks_matrix 127 8.3 0.001 0.001 55.166 57.156 qs_ks_build_kohn_sham_matrix 127 9.3 0.019 0.023 55.166 57.156 qs_ks_update_qs_env 127 7.6 0.001 0.001 49.894 51.795 mp_waitall_1 170050 16.6 19.894 35.740 19.894 35.740 qs_rho_update_rho_low 127 7.7 0.001 0.001 34.784 35.150 calculate_rho_elec 127 8.7 0.146 0.156 34.783 35.149 apply_preconditioner_dbcsr 127 12.6 0.000 0.001 31.854 34.529 apply_single 127 13.6 0.001 0.001 31.854 34.529 qs_ot_get_derivative 116 11.6 0.002 0.002 32.635 33.965 ot_diis_step 116 11.6 0.024 0.026 33.317 33.319 init_scf_loop 11 6.9 0.000 0.001 29.849 29.854 sum_up_and_integrate 127 10.3 0.004 0.007 29.285 29.365 integrate_v_rspace 127 11.3 0.004 0.005 29.181 29.261 make_m2s 4970 13.5 0.131 0.159 24.321 25.580 grid_collocate_task_list 127 9.7 23.951 24.534 23.951 24.534 qs_ot_get_p 127 10.4 0.001 0.002 21.232 22.877 prepare_preconditioner 11 7.9 0.000 0.000 22.578 22.754 make_preconditioner 11 8.9 0.000 0.000 22.578 22.754 make_images 4970 14.5 2.164 2.777 20.707 22.143 make_full_inverse_cholesky 11 9.9 0.000 0.000 20.924 21.237 grid_integrate_task_list 127 12.3 20.755 21.233 20.755 21.233 multiply_cannon_metrocomm3 14910 15.5 0.059 0.110 3.582 17.837 qs_ot_get_derivative_diag 76 12.4 0.003 0.004 14.318 15.319 init_scf_run 11 5.9 0.000 0.006 14.416 14.418 scf_env_initial_rho_setup 11 6.9 0.001 0.004 14.416 14.417 make_images_data 4970 15.5 0.076 0.113 12.346 14.351 hybrid_alltoall_any 5155 16.4 0.750 4.754 10.699 13.184 wfi_extrapolate 11 7.9 0.001 0.002 12.851 12.851 qs_ot_p2m_diag 82 11.4 0.213 0.277 12.373 12.486 mp_sum_l 12261 13.2 8.218 12.270 8.218 12.270 fft_wrap_pw1pw2 1281 11.7 0.025 0.039 11.241 11.422 cp_dbcsr_syevd 82 12.4 0.006 0.007 11.367 11.372 cp_fm_cholesky_invert 11 10.9 11.304 11.319 11.304 11.319 calculate_dm_sparse 127 9.5 0.001 0.002 9.929 10.996 mp_irecv_dv 36438 16.2 4.150 10.637 4.150 10.637 fft_wrap_pw1pw2_140 519 12.2 0.545 0.569 10.087 10.314 multiply_cannon_metrocomm4 12425 15.5 0.061 0.110 3.812 9.607 fft3d_ps 1281 13.7 4.386 4.687 8.819 8.975 density_rs2pw 127 9.7 0.008 0.012 8.306 8.861 multiply_cannon_metrocomm1 14910 15.5 0.076 0.133 4.541 7.683 cp_fm_diag_elpa 82 13.4 0.001 0.001 7.657 7.668 cp_fm_diag_elpa_base 82 14.4 7.505 7.535 7.647 7.647 cp_dbcsr_sm_fm_multiply 37 9.5 0.003 0.004 7.359 7.375 mp_alltoall_d11v 2401 14.1 6.366 6.902 6.366 6.902 cp_fm_cholesky_decompose 22 10.9 6.676 6.850 6.676 6.850 dbcsr_dot_sd 1305 12.0 1.283 1.378 4.581 6.830 mp_sum_d 4454 12.1 4.496 6.808 4.496 6.808 qs_ot_get_orbitals 116 10.6 0.001 0.002 6.054 6.253 potential_pw2rs 127 12.3 0.026 0.030 6.164 6.246 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.011 6.209 mp_allgather_i34 2485 14.5 2.578 6.166 2.578 6.166 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 5.661 6.158 qs_ot_get_derivative_taylor 40 13.0 0.001 0.002 5.412 5.745 dbcsr_complete_redistribute 393 12.7 1.340 1.423 4.769 5.193 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="406", plot="h2o_256_md", label="(4n/9r/4t)", y=258.315000, yerr=0.000000 PlotPoint: name="407", plot="h2o_256_md_mem", label="(4n/9r/4t)", y=611.545455, yerr=1.924183 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/25/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 96 x 192 x 64 17081303040 0.0% 100.0% 0.0% flops 96 x 160 x 64 28468838400 0.0% 100.0% 0.0% flops 267 x 192 x 64 29554311168 0.0% 100.0% 0.0% flops 289 x 192 x 64 31989497856 0.0% 100.0% 0.0% flops 311 x 192 x 64 34424684544 0.0% 100.0% 0.0% flops 342 x 192 x 64 37856083968 0.0% 100.0% 0.0% flops 409 x 192 x 64 45272334336 0.0% 100.0% 0.0% flops 418 x 192 x 64 46268547072 0.0% 100.0% 0.0% flops 427 x 192 x 64 47264759808 0.0% 100.0% 0.0% flops 436 x 192 x 64 48260972544 0.0% 100.0% 0.0% flops 267 x 160 x 64 49257185280 0.0% 100.0% 0.0% flops 96 x 192 x 96 51243909120 0.0% 100.0% 0.0% flops 289 x 160 x 64 53315829760 0.0% 100.0% 0.0% flops 64 x 192 x 64 56937676800 0.0% 100.0% 0.0% flops 311 x 160 x 64 57374474240 0.0% 100.0% 0.0% flops 342 x 160 x 64 63093473280 0.0% 100.0% 0.0% flops 409 x 160 x 64 75453890560 0.0% 100.0% 0.0% flops 418 x 160 x 64 77114245120 0.0% 100.0% 0.0% flops 427 x 160 x 64 78774599680 0.0% 100.0% 0.0% flops 436 x 160 x 64 80434954240 0.0% 100.0% 0.0% flops 96 x 160 x 96 85406515200 0.0% 100.0% 0.0% flops 267 x 192 x 96 88662933504 0.0% 100.0% 0.0% flops 267 x 192 x 862 90500235264 0.0% 100.0% 0.0% flops 32 x 192 x 64 91100282880 0.0% 100.0% 0.0% flops 64 x 160 x 64 94896128000 0.0% 100.0% 0.0% flops 289 x 192 x 96 95968493568 0.0% 100.0% 0.0% flops 289 x 192 x 862 97957183488 0.0% 100.0% 0.0% flops 453 x 192 x 64 100285415424 0.0% 100.0% 0.0% flops 311 x 192 x 96 103274053632 0.0% 100.0% 0.0% flops 311 x 192 x 862 105414131712 0.0% 100.0% 0.0% flops 342 x 192 x 96 113568251904 0.0% 100.0% 0.0% flops 342 x 192 x 862 115921649664 0.0% 100.0% 0.0% flops 96 x 192 x 862 116175863808 0.0% 100.0% 0.0% flops 409 x 192 x 96 135817003008 0.0% 100.0% 0.0% flops 409 x 192 x 862 138631446528 0.0% 100.0% 0.0% flops 418 x 192 x 96 138805641216 0.0% 100.0% 0.0% flops 418 x 192 x 862 141682016256 0.0% 100.0% 0.0% flops 427 x 192 x 96 141794279424 0.0% 100.0% 0.0% flops 427 x 192 x 862 144732585984 0.0% 100.0% 0.0% flops 436 x 192 x 96 144782917632 0.0% 100.0% 0.0% flops 440 x 192 x 64 146111201280 0.0% 100.0% 0.0% flops 267 x 160 x 96 147771555840 0.0% 100.0% 0.0% flops 436 x 192 x 862 147783155712 0.0% 100.0% 0.0% flops 267 x 160 x 862 150833725440 0.0% 100.0% 0.0% flops 32 x 160 x 64 151833804800 0.0% 100.0% 0.0% flops 289 x 160 x 96 159947489280 0.0% 100.0% 0.0% flops 289 x 160 x 862 163261972480 0.0% 100.0% 0.0% flops 453 x 160 x 64 167142359040 0.0% 100.0% 0.0% flops 64 x 192 x 96 170813030400 0.0% 100.0% 0.0% flops 311 x 160 x 96 172123422720 0.0% 100.0% 0.0% flops 311 x 160 x 862 175690219520 0.0% 100.0% 0.0% flops 267 x 192 x 849 178270765056 0.0% 100.0% 0.0% flops 342 x 160 x 96 189280419840 0.0% 100.0% 0.0% flops 289 x 192 x 849 192959741952 0.0% 100.0% 0.0% flops 342 x 160 x 862 193202749440 0.0% 100.0% 0.0% flops 96 x 160 x 862 193626439680 0.0% 100.0% 0.0% flops 311 x 192 x 849 207648718848 0.0% 100.0% 0.0% flops 409 x 160 x 96 226361671680 0.0% 100.0% 0.0% flops 342 x 192 x 849 228346822656 0.0% 100.0% 0.0% flops 96 x 192 x 849 228847583232 0.0% 100.0% 0.0% flops 409 x 160 x 862 231052410880 0.0% 100.0% 0.0% flops 418 x 160 x 96 231342735360 0.0% 100.0% 0.0% flops 418 x 160 x 862 236136693760 0.0% 100.0% 0.0% flops 427 x 160 x 96 236323799040 0.0% 100.0% 0.0% flops 427 x 160 x 862 241220976640 0.0% 100.0% 0.0% flops 436 x 160 x 96 241304862720 0.0% 100.0% 0.0% flops 440 x 160 x 64 243518668800 0.0% 100.0% 0.0% flops 436 x 160 x 862 246305259520 0.0% 100.0% 0.0% flops 409 x 192 x 849 273081434112 0.0% 100.0% 0.0% flops 32 x 192 x 96 273300848640 0.0% 100.0% 0.0% flops 418 x 192 x 849 279090561024 0.0% 100.0% 0.0% flops 64 x 160 x 96 284688384000 0.0% 100.0% 0.0% flops 427 x 192 x 849 285099687936 0.0% 100.0% 0.0% flops 436 x 192 x 849 291108814848 0.0% 100.0% 0.0% flops 267 x 160 x 849 297117941760 0.0% 100.0% 0.0% flops 453 x 192 x 96 300856246272 0.0% 100.0% 0.0% flops 453 x 192 x 862 307090685952 0.0% 100.0% 0.0% flops 289 x 160 x 849 321599569920 0.0% 100.0% 0.0% flops 311 x 160 x 849 346081198080 0.0% 100.0% 0.0% flops 9 x 9 x 64 355059998208 0.0% 100.0% 0.0% flops 342 x 160 x 849 380578037760 0.0% 100.0% 0.0% flops 96 x 160 x 849 381412638720 0.0% 100.0% 0.0% flops 64 x 192 x 862 387252879360 0.0% 100.0% 0.0% flops 440 x 192 x 96 438333603840 0.0% 100.0% 0.0% flops 440 x 192 x 862 447416893440 0.0% 100.0% 0.0% flops 409 x 160 x 849 455135723520 0.0% 100.0% 0.0% flops 32 x 160 x 96 455501414400 0.0% 100.0% 0.0% flops 418 x 160 x 849 465150935040 0.0% 100.0% 0.0% flops 427 x 160 x 849 475166146560 0.0% 100.0% 0.0% flops 436 x 160 x 849 485181358080 0.0% 100.0% 0.0% flops 22 x 9 x 64 493014297600 0.0% 100.0% 0.0% flops 9 x 22 x 64 494442584064 0.0% 100.0% 0.0% flops 453 x 160 x 96 501427077120 0.0% 100.0% 0.0% flops 453 x 160 x 862 511817809920 0.0% 100.0% 0.0% flops 453 x 192 x 849 604918775808 0.0% 100.0% 0.0% flops 32 x 192 x 862 619604606976 0.0% 100.0% 0.0% flops 64 x 160 x 862 645421465600 0.0% 100.0% 0.0% flops 22 x 22 x 64 683571924992 0.0% 100.0% 0.0% flops 440 x 160 x 96 730556006400 0.0% 100.0% 0.0% flops 440 x 160 x 862 745694822400 0.0% 100.0% 0.0% flops 64 x 192 x 849 762825277440 0.0% 100.0% 0.0% flops 440 x 192 x 849 881338613760 0.0% 100.0% 0.0% flops 453 x 160 x 849 1008197959680 0.0% 100.0% 0.0% flops 32 x 160 x 862 1032674344960 0.0% 100.0% 0.0% flops 9 x 9 x 96 1065179994624 0.0% 100.0% 0.0% flops 32 x 192 x 849 1220520443904 0.0% 100.0% 0.0% flops 64 x 160 x 849 1271375462400 0.0% 100.0% 0.0% flops 440 x 160 x 849 1468897689600 0.0% 100.0% 0.0% flops 22 x 9 x 96 1479042892800 0.0% 100.0% 0.0% flops 9 x 22 x 96 1483327752192 0.0% 100.0% 0.0% flops 32 x 160 x 849 2034200739840 0.0% 100.0% 0.0% flops 22 x 22 x 96 2050715774976 0.0% 100.0% 0.0% flops 9 x 32 x 9 5962613575680 0.0% 100.0% 0.0% flops 22 x 32 x 9 8325932617728 0.0% 100.0% 0.0% flops 9 x 32 x 22 8325932617728 0.0% 100.0% 0.0% flops 22 x 32 x 22 11452938371072 0.0% 100.0% 0.0% flops inhomo. stacks 22380222873600 100.0% 0.0% 0.0% flops total 95.022974E+12 23.6% 76.4% 0.0% flops max/rank 5.030957E+12 27.1% 72.9% 0.0% matmuls inhomo. stacks 645336 100.0% 0.0% 0.0% matmuls total 3090444320 0.0% 100.0% 0.0% number of processed stacks 8282084 7.8% 92.2% 0.0% average stack size 1.0 404.6 0.0 marketing flops 144.580175E+12 ------------------------------------------------------------------------------- # multiplications 2507 max memory usage/rank 850.477056E+06 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 1143192 MPI messages size (bytes): total size 2.023815E+12 min size 0.000000E+00 max size 17.653760E+06 average size 1.770319E+06 MPI breakdown and total messages size (bytes): size <= 128 6996 0 128 < size <= 8192 0 0 8192 < size <= 32768 396 8650752 32768 < size <= 131072 319024 36042702848 131072 < size <= 4194304 715736 785529176064 4194304 < size <= 16777216 70320 665379241840 16777216 < size 30720 536870912000 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 3994 58319. MP_Allreduce 11062 1083. MP_Sync 87 MP_Alltoall 1712 12503084. MP_SendRecv 5888 75008. MP_ISendRecv 5888 75008. MP_Wait 22442 MP_ISend 14952 244818. MP_IRecv 14952 244818. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.019 0.047 315.923 315.928 qs_mol_dyn_low 1 2.0 0.046 0.102 315.436 315.456 qs_forces 11 3.9 0.004 0.005 315.020 315.032 qs_energies 11 4.9 0.002 0.002 305.022 305.047 scf_env_do_scf 11 5.9 0.001 0.004 281.764 281.785 scf_env_do_scf_inner_loop 117 6.6 0.004 0.031 225.095 225.101 velocity_verlet 10 3.0 0.009 0.033 206.745 206.763 dbcsr_multiply_generic 2507 12.6 0.266 0.318 156.184 159.583 qs_scf_new_mos 117 7.6 0.001 0.002 141.407 142.623 qs_scf_loop_do_ot 117 8.6 0.001 0.002 141.405 142.621 ot_scf_mini 117 9.6 0.004 0.005 131.991 133.215 multiply_cannon 2507 13.6 0.352 0.390 102.940 114.378 multiply_cannon_loop 2507 14.6 0.524 0.657 95.938 106.312 multiply_cannon_multrec 30084 15.6 85.481 97.684 85.522 97.723 ot_mini 117 10.6 0.001 0.001 80.238 81.664 rebuild_ks_matrix 128 8.3 0.001 0.001 61.340 63.241 qs_ks_build_kohn_sham_matrix 128 9.3 0.020 0.022 61.340 63.240 qs_ks_update_qs_env 128 7.6 0.001 0.001 55.750 57.516 init_scf_loop 11 6.9 0.000 0.001 56.488 56.493 prepare_preconditioner 11 7.9 0.000 0.000 48.244 48.421 make_preconditioner 11 8.9 0.000 0.000 48.243 48.421 make_full_inverse_cholesky 11 9.9 0.022 0.031 39.733 46.399 apply_preconditioner_dbcsr 128 12.6 0.000 0.001 43.510 45.455 apply_single 128 13.6 0.001 0.001 43.509 45.455 ot_diis_step 117 11.6 0.027 0.029 43.580 43.582 make_m2s 5014 13.6 0.160 0.198 38.220 39.443 qs_ot_get_derivative 117 11.6 0.002 0.002 36.423 37.728 qs_rho_update_rho_low 128 7.7 0.001 0.001 36.271 36.411 calculate_rho_elec 128 8.7 0.213 0.229 36.270 36.410 make_images 5014 14.6 3.132 3.775 31.654 32.579 sum_up_and_integrate 128 10.3 0.003 0.007 30.231 30.281 integrate_v_rspace 128 11.3 0.004 0.005 30.123 30.177 mp_waitall_1 147882 16.7 20.846 27.328 20.846 27.328 grid_collocate_task_list 128 9.7 24.901 25.351 24.901 25.351 qs_ot_get_p 128 10.4 0.001 0.002 22.560 24.270 cp_fm_upper_to_full 105 14.8 15.403 22.284 15.403 22.284 make_images_data 5014 15.6 0.088 0.136 19.560 21.943 grid_integrate_task_list 128 12.3 21.426 21.879 21.426 21.879 hybrid_alltoall_any 5200 16.5 0.690 2.437 18.085 20.216 dbcsr_complete_redistribute 395 12.7 1.837 1.962 12.430 17.545 qs_ot_get_derivative_diag 77 12.4 0.003 0.004 16.294 17.144 init_scf_run 11 5.9 0.000 0.001 16.645 16.647 scf_env_initial_rho_setup 11 6.9 0.000 0.001 16.645 16.647 copy_fm_to_dbcsr 209 11.7 0.002 0.003 10.338 15.458 multiply_cannon_metrocomm4 25070 15.6 0.116 0.216 6.123 15.273 mp_irecv_dv 76098 16.2 5.991 15.126 5.991 15.126 wfi_extrapolate 11 7.9 0.001 0.002 14.963 14.964 cp_fm_cholesky_invert 11 10.9 14.863 14.883 14.863 14.883 qs_ot_p2m_diag 83 11.4 0.308 0.349 13.731 13.819 transfer_fm_to_dbcsr 11 9.9 0.032 0.045 8.475 13.506 mp_sum_l 12367 13.3 7.983 13.453 7.983 13.453 mp_alltoall_i22 716 14.1 8.113 13.313 8.113 13.313 cp_dbcsr_syevd 83 12.4 0.006 0.007 12.487 12.504 fft_wrap_pw1pw2 1291 11.7 0.023 0.031 11.954 11.998 calculate_dm_sparse 128 9.5 0.001 0.002 10.492 10.906 fft_wrap_pw1pw2_140 523 12.2 0.580 0.598 10.779 10.849 fft3d_ps 1291 13.7 4.558 4.683 9.320 9.389 density_rs2pw 128 9.7 0.009 0.013 8.704 9.308 cp_dbcsr_sm_fm_multiply 37 9.5 0.003 0.004 9.150 9.217 cp_fm_diag_elpa 83 13.4 0.001 0.001 8.902 8.911 cp_fm_diag_elpa_base 83 14.4 8.042 8.309 8.892 8.892 multiply_cannon_metrocomm3 30084 15.6 0.077 0.142 3.048 8.054 mp_alltoall_d11v 2415 14.1 7.461 7.844 7.461 7.844 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 6.946 7.626 cp_fm_cholesky_decompose 22 10.9 7.373 7.488 7.373 7.488 dbcsr_make_dense_low 13013 15.7 0.131 0.189 6.662 7.273 qs_ot_get_orbitals 117 10.6 0.001 0.001 6.937 7.076 dbcsr_dot_sd 1318 12.0 1.672 1.784 4.198 7.028 make_dense_data 13013 16.7 6.127 6.559 6.473 6.969 dbcsr_make_images_dense 4384 14.8 0.092 0.131 6.145 6.798 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.424 6.572 qs_ot_get_derivative_taylor 40 13.0 0.001 0.002 5.953 6.391 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="408", plot="h2o_256_md", label="(4n/6r/6t)", y=315.928000, yerr=0.000000 PlotPoint: name="409", plot="h2o_256_md_mem", label="(4n/6r/6t)", y=766.545455, yerr=15.905194 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/26/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 160 x 256 x 256 190589173760 0.0% 100.0% 0.0% flops 182 x 256 x 256 216795185152 0.0% 100.0% 0.0% flops 267 x 256 x 256 318045683712 0.0% 100.0% 0.0% flops 169 x 256 x 256 402619629568 0.0% 100.0% 0.0% flops 160 x 256 x 2560 432852172800 0.0% 100.0% 0.0% flops 182 x 256 x 2560 492369346560 0.0% 100.0% 0.0% flops 64 x 256 x 256 613375016960 0.0% 100.0% 0.0% flops 302 x 256 x 256 719474130944 0.0% 100.0% 0.0% flops 267 x 256 x 2560 722322063360 0.0% 100.0% 0.0% flops 324 x 256 x 256 771886153728 0.0% 100.0% 0.0% flops 169 x 256 x 2560 914400215040 0.0% 100.0% 0.0% flops 32 x 256 x 256 1349425037312 0.0% 100.0% 0.0% flops 9 x 9 x 256 1430457200640 0.0% 100.0% 0.0% flops 302 x 256 x 2560 1634016952320 0.0% 100.0% 0.0% flops 324 x 256 x 2560 1753051299840 0.0% 100.0% 0.0% flops 22 x 9 x 256 1986252263424 0.0% 100.0% 0.0% flops 9 x 22 x 256 1992003932160 0.0% 100.0% 0.0% flops 311 x 256 x 256 2222746238976 0.0% 100.0% 0.0% flops 22 x 22 x 256 2753958699008 0.0% 100.0% 0.0% flops 64 x 256 x 2560 3093718630400 0.0% 100.0% 0.0% flops 289 x 256 x 256 3442516951040 0.0% 100.0% 0.0% flops 311 x 256 x 2560 5048138465280 0.0% 100.0% 0.0% flops 9 x 32 x 9 6003313201152 0.0% 100.0% 0.0% flops 32 x 256 x 2560 6806180986880 0.0% 100.0% 0.0% flops 289 x 256 x 2560 7818392371200 0.0% 100.0% 0.0% flops 9 x 32 x 22 8382789550080 0.0% 100.0% 0.0% flops 22 x 32 x 9 8382789550080 0.0% 100.0% 0.0% flops 22 x 32 x 22 11531114856448 0.0% 100.0% 0.0% flops inhomo. stacks 13914431553536 100.0% 0.0% 0.0% flops total 95.347408E+12 14.6% 85.4% 0.0% flops max/rank 7.230276E+12 22.4% 77.6% 0.0% matmuls inhomo. stacks 122672 100.0% 0.0% 0.0% matmuls total 2939461244 0.0% 100.0% 0.0% number of processed stacks 4568676 2.7% 97.3% 0.0% average stack size 1.0 661.1 0.0 marketing flops 145.650931E+12 ------------------------------------------------------------------------------- # multiplications 2529 max memory usage/rank 1.189560E+09 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 242784 MPI messages size (bytes): total size 1.341806E+12 min size 0.000000E+00 max size 52.428800E+06 average size 5.526748E+06 MPI breakdown and total messages size (bytes): size <= 128 1452 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 132 8650752 131072 < size <= 4194304 115008 60297314304 4194304 < size <= 16777216 105840 554906419200 16777216 < size 20352 726592338480 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 31 12. MP_Allreduce 13501 37. MP_Alltoall 9724 750341. MP_ISend 40420 2093556. MP_IRecv 40420 2092660. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 4043 57624. MP_Allreduce 11184 1163. MP_Sync 88 MP_Alltoall 1724 18848050. MP_SendRecv 3870 122880. MP_ISendRecv 3870 122880. MP_Wait 16244 MP_ISend 10760 423501. MP_IRecv 10760 423501. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.020 0.044 311.435 311.446 qs_mol_dyn_low 1 2.0 0.008 0.022 310.922 310.957 qs_forces 11 3.9 0.004 0.005 310.724 310.738 qs_energies 11 4.9 0.002 0.002 300.808 300.844 scf_env_do_scf 11 5.9 0.001 0.002 276.188 276.206 scf_env_do_scf_inner_loop 118 6.6 0.004 0.021 234.155 234.176 velocity_verlet 10 3.0 0.001 0.001 200.170 200.182 dbcsr_multiply_generic 2529 12.6 0.274 0.343 164.688 167.573 qs_scf_new_mos 118 7.6 0.001 0.001 149.089 150.767 qs_scf_loop_do_ot 118 8.6 0.001 0.002 149.088 150.766 ot_scf_mini 118 9.6 0.004 0.005 138.959 140.938 multiply_cannon 2529 13.6 0.362 0.420 113.900 120.778 multiply_cannon_loop 2529 14.6 0.350 0.487 102.849 108.487 multiply_cannon_multrec 10116 15.6 83.727 94.853 83.770 94.894 ot_mini 118 10.6 0.001 0.001 83.704 85.708 rebuild_ks_matrix 129 8.3 0.001 0.001 62.459 64.251 qs_ks_build_kohn_sham_matrix 129 9.3 0.020 0.022 62.458 64.250 qs_ks_update_qs_env 129 7.6 0.001 0.001 57.147 58.793 apply_preconditioner_dbcsr 129 12.6 0.000 0.001 47.236 49.329 apply_single 129 13.6 0.001 0.001 47.236 49.329 ot_diis_step 118 11.6 0.032 0.040 47.491 47.493 make_m2s 5058 13.6 0.131 0.168 36.822 43.716 init_scf_loop 11 6.9 0.000 0.001 41.820 41.827 mp_waitall_1 126876 16.7 31.048 40.780 31.048 40.780 qs_ot_get_derivative 118 11.6 0.002 0.002 36.109 38.122 qs_rho_update_rho_low 129 7.7 0.001 0.001 36.325 36.552 calculate_rho_elec 129 8.7 0.318 0.329 36.324 36.552 make_images 5058 14.6 3.542 4.772 29.540 35.364 prepare_preconditioner 11 7.9 0.000 0.000 33.296 33.413 make_preconditioner 11 8.9 0.000 0.000 33.296 33.413 make_full_inverse_cholesky 11 9.9 0.019 0.038 30.517 30.919 sum_up_and_integrate 129 10.3 0.002 0.003 29.299 29.340 integrate_v_rspace 129 11.3 0.004 0.005 29.198 29.241 qs_ot_get_p 129 10.4 0.001 0.002 24.238 26.574 make_images_data 5058 15.6 0.077 0.111 18.695 25.216 grid_collocate_task_list 129 9.7 23.419 24.799 23.419 24.799 hybrid_alltoall_any 5245 16.5 1.115 4.978 17.926 23.402 multiply_cannon_metrocomm1 10116 15.6 0.049 0.081 11.318 21.612 grid_integrate_task_list 129 12.3 20.032 20.677 20.032 20.677 qs_ot_get_derivative_diag 78 12.4 0.003 0.004 16.207 17.732 cp_fm_cholesky_invert 11 10.9 17.360 17.376 17.360 17.376 init_scf_run 11 5.9 0.000 0.002 17.176 17.177 scf_env_initial_rho_setup 11 6.9 0.000 0.001 17.176 17.177 mp_allgather_i34 2529 14.6 7.830 17.026 7.830 17.026 wfi_extrapolate 11 7.9 0.001 0.001 15.408 15.408 qs_ot_p2m_diag 84 11.4 0.435 0.447 15.034 15.063 cp_dbcsr_syevd 84 12.4 0.006 0.007 13.700 13.703 fft_wrap_pw1pw2 1301 11.7 0.022 0.026 13.152 13.437 density_rs2pw 129 9.7 0.008 0.010 10.044 13.148 fft_wrap_pw1pw2_140 527 12.2 0.719 0.772 11.678 12.122 calculate_dm_sparse 129 9.5 0.001 0.002 11.288 11.921 multiply_cannon_metrocomm4 7587 15.6 0.038 0.074 5.080 11.785 mp_irecv_dv 29102 15.9 5.057 11.768 5.057 11.768 mp_sum_l 12473 13.3 6.628 10.486 6.628 10.486 fft3d_ps 1301 13.7 4.926 5.067 10.158 10.288 cp_fm_diag_elpa 84 13.4 0.001 0.001 10.152 10.155 cp_fm_diag_elpa_base 84 14.4 9.773 9.897 10.143 10.143 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.003 9.710 9.787 dbcsr_make_dense_low 6496 15.6 0.079 0.109 7.719 9.365 mp_alltoall_d11v 2429 14.1 8.056 9.247 8.056 9.247 make_dense_data 6496 16.6 7.397 8.916 7.609 9.223 dbcsr_make_images_dense 4424 14.8 0.048 0.066 6.906 8.670 dbcsr_data_release 190888 16.1 4.760 8.470 4.760 8.470 cp_fm_cholesky_decompose 22 10.9 8.046 8.250 8.046 8.250 multiply_cannon_metrocomm3 10116 15.6 0.038 0.070 2.138 7.983 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 7.540 7.954 dbcsr_complete_redistribute 397 12.7 2.569 2.655 7.497 7.893 dbcsr_dot_sd 1331 12.0 2.333 2.366 4.749 7.286 qs_ot_get_orbitals 118 10.6 0.001 0.002 6.905 6.952 dbcsr_destroy 27311 14.2 0.082 0.142 3.674 6.699 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 6.446 6.586 copy_dbcsr_to_fm 187 11.8 0.004 0.006 5.737 6.375 mp_waitany 10760 13.9 3.425 6.344 3.425 6.344 qs_ot_get_derivative_taylor 40 13.0 0.002 0.002 5.875 6.319 transfer_rs2pw 527 10.6 0.007 0.009 3.394 6.300 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="410", plot="h2o_256_md", label="(4n/4r/9t)", y=311.446000, yerr=0.000000 PlotPoint: name="411", plot="h2o_256_md_mem", label="(4n/4r/9t)", y=1075.000000, yerr=15.920256 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/27/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 32 x 192 x 192 120758206464 0.0% 100.0% 0.0% flops 462 x 192 x 192 154779254784 0.0% 100.0% 0.0% flops 64 x 192 x 192 155260551168 0.0% 100.0% 0.0% flops 462 x 192 x 1702 155805382656 0.0% 100.0% 0.0% flops 32 x 192 x 160 201263677440 0.0% 100.0% 0.0% flops 32 x 160 x 192 201263677440 0.0% 100.0% 0.0% flops 440 x 192 x 192 221113221120 0.0% 100.0% 0.0% flops 440 x 192 x 1702 222579118080 0.0% 100.0% 0.0% flops 462 x 160 x 192 257965424640 0.0% 100.0% 0.0% flops 462 x 192 x 160 257965424640 0.0% 100.0% 0.0% flops 64 x 192 x 160 258767585280 0.0% 100.0% 0.0% flops 64 x 160 x 192 258767585280 0.0% 100.0% 0.0% flops 462 x 160 x 1702 259675637760 0.0% 100.0% 0.0% flops 32 x 192 x 1702 269960183808 0.0% 100.0% 0.0% flops 453 x 192 x 192 303528148992 0.0% 100.0% 0.0% flops 453 x 192 x 1702 305540425728 0.0% 100.0% 0.0% flops 462 x 192 x 1698 310878425088 0.0% 100.0% 0.0% flops 462 x 192 x 1711 313258530816 0.0% 100.0% 0.0% flops 32 x 160 x 160 335439462400 0.0% 100.0% 0.0% flops 64 x 192 x 1702 347091664896 0.0% 100.0% 0.0% flops 440 x 192 x 160 368522035200 0.0% 100.0% 0.0% flops 440 x 160 x 192 368522035200 0.0% 100.0% 0.0% flops 440 x 160 x 1702 370965196800 0.0% 100.0% 0.0% flops 449 x 192 x 192 376059985920 0.0% 100.0% 0.0% flops 449 x 192 x 1702 378553121280 0.0% 100.0% 0.0% flops 462 x 160 x 160 429942374400 0.0% 100.0% 0.0% flops 64 x 160 x 160 431279308800 0.0% 100.0% 0.0% flops 440 x 192 x 1698 444112035840 0.0% 100.0% 0.0% flops 440 x 192 x 1711 447512186880 0.0% 100.0% 0.0% flops 32 x 160 x 1702 449933639680 0.0% 100.0% 0.0% flops 453 x 192 x 160 505880248320 0.0% 100.0% 0.0% flops 453 x 160 x 192 505880248320 0.0% 100.0% 0.0% flops 453 x 160 x 1702 509234042880 0.0% 100.0% 0.0% flops 462 x 160 x 1698 518130708480 0.0% 100.0% 0.0% flops 462 x 160 x 1711 522097551360 0.0% 100.0% 0.0% flops 9 x 9 x 192 536421450240 0.0% 100.0% 0.0% flops 32 x 192 x 1698 538651459584 0.0% 100.0% 0.0% flops 32 x 192 x 1711 542775410688 0.0% 100.0% 0.0% flops 64 x 160 x 1702 578486108160 0.0% 100.0% 0.0% flops 453 x 192 x 1698 609644703744 0.0% 100.0% 0.0% flops 440 x 160 x 160 614203392000 0.0% 100.0% 0.0% flops 453 x 192 x 1711 614312183808 0.0% 100.0% 0.0% flops 449 x 192 x 160 626766643200 0.0% 100.0% 0.0% flops 449 x 160 x 192 626766643200 0.0% 100.0% 0.0% flops 449 x 160 x 1702 630921868800 0.0% 100.0% 0.0% flops 64 x 192 x 1698 692551876608 0.0% 100.0% 0.0% flops 64 x 192 x 1711 697854099456 0.0% 100.0% 0.0% flops 440 x 160 x 1698 740186726400 0.0% 100.0% 0.0% flops 22 x 9 x 192 744844598784 0.0% 100.0% 0.0% flops 440 x 160 x 1711 745853644800 0.0% 100.0% 0.0% flops 9 x 22 x 192 747001474560 0.0% 100.0% 0.0% flops 449 x 192 x 1698 755326909440 0.0% 100.0% 0.0% flops 449 x 192 x 1711 761109742080 0.0% 100.0% 0.0% flops 453 x 160 x 160 843133747200 0.0% 100.0% 0.0% flops 9 x 9 x 160 894035750400 0.0% 100.0% 0.0% flops 32 x 160 x 1698 897752432640 0.0% 100.0% 0.0% flops 32 x 160 x 1711 904625684480 0.0% 100.0% 0.0% flops 453 x 160 x 1698 1016074506240 0.0% 100.0% 0.0% flops 453 x 160 x 1711 1023853639680 0.0% 100.0% 0.0% flops 22 x 22 x 192 1032734512128 0.0% 100.0% 0.0% flops 449 x 160 x 160 1044611072000 0.0% 100.0% 0.0% flops 64 x 160 x 1698 1154253127680 0.0% 100.0% 0.0% flops 64 x 160 x 1711 1163090165760 0.0% 100.0% 0.0% flops 22 x 9 x 160 1241407664640 0.0% 100.0% 0.0% flops 9 x 22 x 160 1245002457600 0.0% 100.0% 0.0% flops 449 x 160 x 1698 1258878182400 0.0% 100.0% 0.0% flops 449 x 160 x 1711 1268516236800 0.0% 100.0% 0.0% flops 22 x 22 x 160 1721224186880 0.0% 100.0% 0.0% flops 9 x 32 x 9 6003313201152 0.0% 100.0% 0.0% flops 22 x 32 x 9 8382789550080 0.0% 100.0% 0.0% flops 9 x 32 x 22 8382789550080 0.0% 100.0% 0.0% flops 22 x 32 x 22 11531114856448 0.0% 100.0% 0.0% flops inhomo. stacks 24540172943360 100.0% 0.0% 0.0% flops total 98.898721E+12 24.8% 75.2% 0.0% flops max/rank 9.799051E+12 26.8% 73.2% 0.0% matmuls inhomo. stacks 325374 100.0% 0.0% 0.0% matmuls total 2982630470 0.0% 100.0% 0.0% number of processed stacks 5600350 5.8% 94.2% 0.0% average stack size 1.0 565.4 0.0 marketing flops 145.651870E+12 ------------------------------------------------------------------------------- # multiplications 2529 max memory usage/rank 1.539183E+09 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 242784 MPI messages size (bytes): total size 1.842673E+12 min size 0.000000E+00 max size 70.506240E+06 average size 7.589764E+06 MPI breakdown and total messages size (bytes): size <= 128 1386 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 66 2883584 131072 < size <= 4194304 145092 136751087616 4194304 < size <= 16777216 70800 494927872000 16777216 < size 25440 1210987230800 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 4043 58559. MP_Allreduce 11184 1416. MP_Sync 88 MP_Alltoall 1724 25011413. MP_SendRecv 2838 150016. MP_ISendRecv 2838 150016. MP_Wait 13060 MP_ISend 8608 623442. MP_IRecv 8608 623442. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.132 0.178 414.723 414.728 qs_mol_dyn_low 1 2.0 0.041 0.064 413.740 413.886 qs_forces 11 3.9 0.017 0.038 413.318 413.374 qs_energies 11 4.9 0.012 0.025 401.285 401.347 scf_env_do_scf 11 5.9 0.002 0.005 371.340 371.374 scf_env_do_scf_inner_loop 118 6.6 0.005 0.023 289.573 289.582 velocity_verlet 10 3.0 0.019 0.026 274.748 274.790 dbcsr_multiply_generic 2529 12.6 0.298 0.343 214.591 217.754 qs_scf_new_mos 118 7.6 0.001 0.001 190.540 191.650 qs_scf_loop_do_ot 118 8.6 0.001 0.002 190.538 191.649 ot_scf_mini 118 9.6 0.005 0.006 180.538 181.392 multiply_cannon 2529 13.6 0.390 0.433 153.827 160.961 multiply_cannon_loop 2529 14.6 0.377 0.414 143.474 150.246 ot_mini 118 10.6 0.002 0.003 110.792 111.885 multiply_cannon_multrec 15174 15.6 85.392 110.650 85.445 110.698 mp_waitall_1 125844 16.7 63.158 100.836 63.158 100.836 init_scf_loop 11 6.9 0.003 0.007 81.492 81.504 multiply_cannon_metrocomm3 15174 15.6 0.068 0.124 42.010 81.083 rebuild_ks_matrix 129 8.3 0.001 0.001 74.053 75.883 qs_ks_build_kohn_sham_matrix 129 9.3 0.021 0.025 74.052 75.883 apply_preconditioner_dbcsr 129 12.6 0.001 0.001 68.612 71.695 apply_single 129 13.6 0.001 0.001 68.612 71.695 prepare_preconditioner 11 7.9 0.000 0.000 70.840 70.988 make_preconditioner 11 8.9 0.000 0.001 70.839 70.988 qs_ks_update_qs_env 129 7.6 0.001 0.001 67.884 69.482 ot_diis_step 118 11.6 0.033 0.036 68.331 68.334 make_full_inverse_cholesky 11 9.9 0.038 0.048 58.593 68.111 make_m2s 5058 13.6 0.151 0.185 45.476 47.008 qs_ot_get_derivative 118 11.6 0.002 0.003 42.159 43.079 qs_rho_update_rho_low 129 7.7 0.001 0.001 41.060 41.316 calculate_rho_elec 129 8.7 0.413 0.425 41.059 41.315 make_images 5058 14.6 4.527 5.144 35.170 36.840 multiply_cannon_metrocomm4 12645 15.6 0.072 0.119 13.784 36.653 cp_fm_upper_to_full 106 14.8 26.464 36.476 26.464 36.476 mp_irecv_dv 35463 16.2 13.667 36.443 13.667 36.443 sum_up_and_integrate 129 10.3 0.003 0.006 31.162 31.295 integrate_v_rspace 129 11.3 0.004 0.005 31.052 31.192 grid_collocate_task_list 129 9.7 28.708 29.303 28.708 29.303 qs_ot_get_p 129 10.4 0.001 0.002 27.557 29.263 hybrid_alltoall_any 5245 16.5 1.364 3.793 22.358 25.776 make_images_data 5058 15.6 0.084 0.137 23.141 25.319 dbcsr_complete_redistribute 397 12.7 3.241 3.446 18.132 24.930 grid_integrate_task_list 129 12.3 22.110 22.491 22.110 22.491 copy_fm_to_dbcsr 210 11.7 0.002 0.002 14.726 21.404 init_scf_run 11 5.9 0.001 0.003 21.260 21.266 scf_env_initial_rho_setup 11 6.9 0.002 0.003 21.259 21.266 cp_fm_cholesky_invert 11 10.9 21.095 21.130 21.095 21.130 wfi_extrapolate 11 7.9 0.024 0.048 19.084 19.085 transfer_fm_to_dbcsr 11 9.9 0.038 0.042 12.205 18.713 qs_ot_get_derivative_diag 78 12.4 0.003 0.004 17.671 18.555 mp_alltoall_i22 720 14.1 10.841 17.686 10.841 17.686 qs_ot_p2m_diag 84 11.4 0.540 0.599 17.472 17.553 cp_dbcsr_syevd 84 12.4 0.007 0.007 15.987 15.992 fft_wrap_pw1pw2 1301 11.7 0.023 0.030 13.091 13.272 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.003 12.916 13.162 mp_sum_l 12473 13.3 8.877 12.857 8.877 12.857 cp_fm_diag_elpa 84 13.4 0.001 0.001 12.460 12.478 cp_fm_diag_elpa_base 84 14.4 10.371 10.895 12.443 12.443 calculate_dm_sparse 129 9.5 0.001 0.001 11.280 11.985 fft_wrap_pw1pw2_140 527 12.2 0.827 0.846 11.803 11.960 dbcsr_dot_sd 1331 12.0 2.927 3.145 6.555 11.099 dbcsr_make_dense_low 10920 15.7 0.116 0.168 10.629 11.053 make_dense_data 10920 16.7 10.095 10.513 10.464 10.894 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 10.029 10.342 fft3d_ps 1301 13.7 5.239 5.596 10.029 10.302 density_rs2pw 129 9.7 0.008 0.012 9.242 10.076 dbcsr_make_images_dense 4424 14.8 0.073 0.103 9.864 10.042 mp_alltoall_d11v 2429 14.1 8.764 9.179 8.764 9.179 qs_ot_get_orbitals 118 10.6 0.001 0.001 8.884 9.138 mp_sum_d 4511 12.1 4.379 9.039 4.379 9.039 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="412", plot="h2o_256_md", label="(4n/3r/12t)", y=414.728000, yerr=0.000000 PlotPoint: name="413", plot="h2o_256_md_mem", label="(4n/3r/12t)", y=1301.636364, yerr=30.556275 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/28/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 258 x 256 x 256 614650085376 0.0% 100.0% 0.0% flops 32 x 256 x 256 993211187200 0.0% 100.0% 0.0% flops 64 x 256 x 256 993211187200 0.0% 100.0% 0.0% flops 280 x 256 x 256 1000593162240 0.0% 100.0% 0.0% flops 258 x 256 x 2560 1395948257280 0.0% 100.0% 0.0% flops 9 x 9 x 256 1430451062784 0.0% 100.0% 0.0% flops 22 x 9 x 256 1986258751488 0.0% 100.0% 0.0% flops 9 x 22 x 256 1991997444096 0.0% 100.0% 0.0% flops 280 x 256 x 2560 2272473907200 0.0% 100.0% 0.0% flops 289 x 256 x 256 2409761865728 0.0% 100.0% 0.0% flops 22 x 22 x 256 2753958699008 0.0% 100.0% 0.0% flops 311 x 256 x 256 3334119358464 0.0% 100.0% 0.0% flops 32 x 256 x 2560 4949949808640 0.0% 100.0% 0.0% flops 64 x 256 x 2560 4949949808640 0.0% 100.0% 0.0% flops 289 x 256 x 2560 5472874659840 0.0% 100.0% 0.0% flops 9 x 32 x 9 6003285995520 0.0% 100.0% 0.0% flops 311 x 256 x 2560 7572207697920 0.0% 100.0% 0.0% flops 22 x 32 x 9 8382791983104 0.0% 100.0% 0.0% flops 9 x 32 x 22 8382791983104 0.0% 100.0% 0.0% flops 22 x 32 x 22 11531114856448 0.0% 100.0% 0.0% flops inhomo. stacks 19217579507712 100.0% 0.0% 0.0% flops total 97.646563E+12 19.7% 80.3% 0.0% flops max/rank 12.577044E+12 20.6% 79.4% 0.0% matmuls inhomo. stacks 167280 100.0% 0.0% 0.0% matmuls total 2939394456 0.0% 100.0% 0.0% number of processed stacks 4491964 3.7% 96.3% 0.0% average stack size 1.0 679.6 0.0 marketing flops 145.674553E+12 ------------------------------------------------------------------------------- # multiplications 2540 max memory usage/rank 4.186665E+09 # max total images/rank 2 # max 3D layers 1 # MPI messages exchanged 101600 MPI messages size (bytes): total size 1.145338E+12 min size 0.000000E+00 max size 104.857600E+06 average size 11.273011E+06 MPI breakdown and total messages size (bytes): size <= 128 572 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 44 2883584 131072 < size <= 4194304 46088 35802578944 4194304 < size <= 16777216 44720 382939955200 16777216 < size 10176 726592076688 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 4035 58670. MP_Allreduce 11164 1500. MP_Sync 88 MP_Alltoall 1724 36993678. MP_SendRecv 1806 218624. MP_ISendRecv 1806 218624. MP_Wait 9876 MP_ISend 6456 1080169. MP_IRecv 6456 1080169. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.159 0.184 462.641 462.668 qs_mol_dyn_low 1 2.0 0.037 0.055 461.763 461.789 qs_forces 11 3.9 0.011 0.013 461.483 461.506 qs_energies 11 4.9 0.040 0.041 448.959 449.027 scf_env_do_scf 11 5.9 0.002 0.003 417.028 417.071 velocity_verlet 10 3.0 0.002 0.003 316.408 316.433 scf_env_do_scf_inner_loop 118 6.6 0.007 0.022 285.196 285.206 dbcsr_multiply_generic 2540 12.6 0.308 0.315 196.621 197.401 qs_scf_new_mos 118 7.6 0.001 0.001 182.216 182.754 qs_scf_loop_do_ot 118 8.6 0.001 0.002 182.215 182.753 ot_scf_mini 118 9.6 0.005 0.006 171.654 172.167 multiply_cannon 2540 13.6 0.394 0.418 134.310 135.875 init_scf_loop 11 6.9 0.002 0.003 131.450 131.470 multiply_cannon_loop 2540 14.6 0.292 0.335 128.083 130.512 prepare_preconditioner 11 7.9 0.000 0.000 120.422 120.562 make_preconditioner 11 8.9 0.001 0.001 120.422 120.562 make_full_inverse_cholesky 11 9.9 0.046 0.053 96.289 116.550 ot_mini 118 10.6 0.002 0.002 98.887 99.371 multiply_cannon_multrec 10160 15.6 83.149 99.279 83.207 99.334 mp_waitall_1 105020 16.8 57.827 82.090 57.827 82.090 rebuild_ks_matrix 129 8.3 0.001 0.001 73.979 75.101 qs_ks_build_kohn_sham_matrix 129 9.3 0.019 0.020 73.979 75.100 cp_fm_upper_to_full 106 14.8 51.218 73.325 51.218 73.325 qs_ks_update_qs_env 129 7.6 0.001 0.001 68.087 69.075 apply_preconditioner_dbcsr 129 12.6 0.001 0.001 60.876 61.492 apply_single 129 13.6 0.001 0.001 60.875 61.492 ot_diis_step 118 11.6 0.038 0.039 59.818 59.821 multiply_cannon_metrocomm3 10160 15.6 0.038 0.040 36.595 57.515 make_m2s 5080 13.6 0.134 0.138 48.903 49.999 qs_rho_update_rho_low 129 7.7 0.001 0.001 45.636 45.871 calculate_rho_elec 129 8.7 0.579 0.581 45.635 45.870 dbcsr_complete_redistribute 397 12.7 4.529 4.560 31.901 44.202 copy_fm_to_dbcsr 210 11.7 0.002 0.002 27.220 39.530 qs_ot_get_derivative 118 11.6 0.002 0.003 38.974 39.511 make_images 5080 14.6 6.071 6.280 35.789 36.946 transfer_fm_to_dbcsr 11 9.9 0.042 0.047 24.084 36.208 mp_alltoall_i22 720 14.1 21.724 34.093 21.724 34.093 qs_ot_get_p 129 10.4 0.001 0.001 33.031 33.770 sum_up_and_integrate 129 10.3 0.002 0.002 32.785 32.906 integrate_v_rspace 129 11.3 0.003 0.004 32.673 32.794 grid_collocate_task_list 129 9.7 31.728 32.104 31.728 32.104 cp_fm_cholesky_invert 11 10.9 30.257 30.269 30.257 30.269 make_images_data 5080 15.6 0.075 0.085 23.524 27.024 hybrid_alltoall_any 5267 16.5 2.041 4.546 23.717 26.859 qs_ot_p2m_diag 84 11.4 0.716 0.723 23.923 23.929 grid_integrate_task_list 129 12.3 22.526 22.798 22.526 22.798 cp_dbcsr_syevd 84 12.4 0.006 0.007 22.312 22.316 init_scf_run 11 5.9 0.001 0.003 21.239 21.244 scf_env_initial_rho_setup 11 6.9 0.001 0.003 21.238 21.243 wfi_extrapolate 11 7.9 0.001 0.001 19.158 19.160 cp_fm_diag_elpa 84 13.4 0.001 0.001 18.731 18.735 cp_fm_diag_elpa_base 84 14.4 14.335 15.782 18.726 18.727 qs_ot_get_derivative_diag 78 12.4 0.003 0.003 15.919 16.437 fft_wrap_pw1pw2 1301 11.7 0.021 0.025 14.473 14.511 multiply_cannon_metrocomm4 7620 15.6 0.038 0.041 6.826 14.185 mp_irecv_dv 24079 16.2 6.763 14.083 6.763 14.083 dbcsr_make_dense_low 8752 15.6 0.097 0.098 13.766 13.878 make_dense_data 8752 16.6 13.277 13.360 13.632 13.743 fft_wrap_pw1pw2_140 527 12.2 1.170 1.181 13.016 13.061 dbcsr_make_images_dense 4446 14.8 0.056 0.057 12.623 12.794 calculate_dm_sparse 129 9.5 0.001 0.002 11.924 12.535 cp_fm_cholesky_decompose 22 10.9 12.420 12.439 12.420 12.439 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 12.303 12.370 mp_alltoall_d11v 2429 14.1 10.915 11.317 10.915 11.317 density_rs2pw 129 9.7 0.007 0.008 10.037 10.922 fft3d_ps 1301 13.7 5.757 5.818 10.835 10.871 copy_dbcsr_to_fm 187 11.8 0.004 0.004 9.278 9.575 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 8.920 9.327 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="414", plot="h2o_256_md", label="(4n/2r/18t)", y=462.668000, yerr=0.000000 PlotPoint: name="415", plot="h2o_256_md_mem", label="(4n/2r/18t)", y=3162.181818, yerr=499.219175 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/29/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 9 x 32 x 32 3321888768 0.0% 100.0% 0.0% flops 22 x 32 x 32 4060086272 0.0% 100.0% 0.0% flops 138 x 512 x 512 986298974208 0.0% 100.0% 0.0% flops 160 x 512 x 512 1334124216320 0.0% 100.0% 0.0% flops 9 x 9 x 512 1430456039424 0.0% 100.0% 0.0% flops 32 x 512 x 512 1962800054272 0.0% 100.0% 0.0% flops 22 x 9 x 512 1986255912960 0.0% 100.0% 0.0% flops 9 x 22 x 512 1992003932160 0.0% 100.0% 0.0% flops 138 x 512 x 5120 2240009994240 0.0% 100.0% 0.0% flops 129 x 512 x 512 2612262862848 0.0% 100.0% 0.0% flops 22 x 22 x 512 2753958699008 0.0% 100.0% 0.0% flops 160 x 512 x 5120 3029965209600 0.0% 100.0% 0.0% flops 151 x 512 x 512 5036318916608 0.0% 100.0% 0.0% flops 129 x 512 x 5120 5932780093440 0.0% 100.0% 0.0% flops 9 x 32 x 9 6003307892736 0.0% 100.0% 0.0% flops 22 x 32 x 9 8382797660160 0.0% 100.0% 0.0% flops 9 x 32 x 22 8382797660160 0.0% 100.0% 0.0% flops 32 x 512 x 5120 9899899617280 0.0% 100.0% 0.0% flops 151 x 512 x 5120 11438118666240 0.0% 100.0% 0.0% flops 22 x 32 x 22 11531114856448 0.0% 100.0% 0.0% flops inhomo. stacks 7886533033984 100.0% 0.0% 0.0% flops total 94.829186E+12 8.3% 91.7% 0.0% flops max/rank 25.187935E+12 10.7% 89.3% 0.0% matmuls inhomo. stacks 33456 100.0% 0.0% 0.0% matmuls total 2896413202 0.0% 100.0% 0.0% number of processed stacks 3532544 0.9% 99.1% 0.0% average stack size 1.0 827.8 0.0 marketing flops 145.650931E+12 ------------------------------------------------------------------------------- # multiplications 2529 max memory usage/rank 15.152042E+09 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 20232 MPI messages size (bytes): total size 447.268389E+09 min size 0.000000E+00 max size 209.715200E+06 average size 22.106978E+06 MPI breakdown and total messages size (bytes): size <= 128 110 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 22 2883584 131072 < size <= 4194304 9584 20099104768 4194304 < size <= 16777216 0 0 16777216 < size 10516 427166295184 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 21 12. MP_Allreduce 13481 37. MP_Alltoall 9724 3272170. MP_ISend 20188 5665856. MP_IRecv 20188 5660508. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 4032 58717. MP_Allreduce 11162 1841. MP_Sync 88 MP_Alltoall 1702 67413288. MP_SendRecv 1161 1443499. MP_ISendRecv 1161 1443499. MP_Wait 3401 MP_ISend 1680 4586667. MP_IRecv 1680 4586667. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.066 0.082 416.155 416.162 qs_mol_dyn_low 1 2.0 0.094 0.098 415.145 415.150 qs_forces 11 3.9 0.030 0.043 414.774 414.785 qs_energies 11 4.9 0.007 0.008 396.305 396.338 scf_env_do_scf 11 5.9 0.016 0.019 354.154 354.177 velocity_verlet 10 3.0 0.002 0.002 282.219 282.240 scf_env_do_scf_inner_loop 118 6.6 0.009 0.027 267.064 267.115 qs_scf_new_mos 118 7.6 0.001 0.001 158.193 158.426 qs_scf_loop_do_ot 118 8.6 0.001 0.001 158.192 158.425 dbcsr_multiply_generic 2529 12.6 0.376 0.391 150.742 152.833 ot_scf_mini 118 9.6 0.005 0.005 146.477 146.776 multiply_cannon 2529 13.6 0.950 1.034 87.809 92.439 init_scf_loop 11 6.9 0.014 0.015 86.388 86.409 multiply_cannon_loop 2529 14.6 0.170 0.199 78.724 82.356 ot_mini 118 10.6 0.002 0.002 81.115 81.437 multiply_cannon_multrec 5058 15.6 73.342 75.870 73.630 76.170 prepare_preconditioner 11 7.9 0.000 0.000 74.718 74.842 make_preconditioner 11 8.9 0.001 0.002 74.718 74.842 rebuild_ks_matrix 129 8.3 0.001 0.001 68.642 69.179 qs_ks_build_kohn_sham_matrix 129 9.3 0.018 0.019 68.641 69.178 make_full_inverse_cholesky 11 9.9 0.065 0.076 67.070 67.981 qs_ks_update_qs_env 129 7.6 0.001 0.001 63.712 64.100 qs_rho_update_rho_low 129 7.7 0.001 0.001 57.626 57.741 calculate_rho_elec 129 8.7 1.032 1.033 57.625 57.740 make_m2s 5058 13.6 0.133 0.136 49.361 54.171 apply_preconditioner_dbcsr 129 12.6 0.001 0.001 41.572 42.629 apply_single 129 13.6 0.001 0.001 41.571 42.629 ot_diis_step 118 11.6 0.047 0.049 42.303 42.304 grid_collocate_task_list 129 9.7 39.846 40.199 39.846 40.199 qs_ot_get_derivative 118 11.6 0.003 0.004 38.694 38.982 qs_ot_get_p 129 10.4 0.001 0.001 38.261 38.387 make_images 5058 14.6 7.191 7.534 32.179 36.843 cp_fm_cholesky_invert 11 10.9 36.342 36.349 36.342 36.349 sum_up_and_integrate 129 10.3 0.002 0.002 35.408 35.440 integrate_v_rspace 129 11.3 0.004 0.004 35.293 35.325 mp_waitall_1 82649 16.9 20.685 31.625 20.685 31.625 qs_ot_p2m_diag 84 11.4 1.250 1.256 31.338 31.346 cp_dbcsr_syevd 84 12.4 0.007 0.008 28.981 28.981 hybrid_alltoall_any 5245 16.5 3.628 8.286 20.765 27.671 make_images_data 5058 15.6 0.074 0.086 19.662 26.532 cp_fm_diag_elpa 84 13.4 0.000 0.000 25.188 25.189 cp_fm_diag_elpa_base 84 14.4 23.953 24.169 25.182 25.182 grid_integrate_task_list 129 12.3 23.569 23.665 23.569 23.665 init_scf_run 11 5.9 0.001 0.003 23.619 23.620 scf_env_initial_rho_setup 11 6.9 0.003 0.005 23.618 23.620 dbcsr_complete_redistribute 397 12.7 8.745 8.972 21.430 22.110 wfi_extrapolate 11 7.9 0.001 0.001 21.013 21.014 fft_wrap_pw1pw2 1301 11.7 0.021 0.021 19.533 19.572 dbcsr_make_dense_low 6496 15.6 0.087 0.088 18.929 19.119 make_dense_data 6496 16.6 18.179 18.594 18.811 19.000 fft_wrap_pw1pw2_140 527 12.2 2.157 2.166 17.451 17.538 dbcsr_make_images_dense 4424 14.8 0.043 0.044 16.413 16.674 copy_dbcsr_to_fm 187 11.8 0.004 0.004 15.077 15.627 qs_ot_get_derivative_diag 78 12.4 0.003 0.003 15.186 15.472 cp_fm_cholesky_decompose 22 10.9 14.782 15.150 14.782 15.150 fft3d_ps 1301 13.7 8.097 8.170 14.323 14.386 mp_allgather_i34 2529 14.6 5.784 13.663 5.784 13.663 mp_alltoall_d11v 2429 14.1 13.511 13.601 13.511 13.601 calculate_dm_sparse 129 9.5 0.001 0.001 13.429 13.533 qs_energies_init_hamiltonians 11 5.9 0.001 0.001 13.466 13.469 copy_fm_to_dbcsr 210 11.7 0.002 0.002 12.804 13.462 density_rs2pw 129 9.7 0.006 0.006 12.963 13.273 cp_dbcsr_sm_fm_multiply 37 9.5 0.002 0.002 12.664 12.733 transfer_dbcsr_to_fm 11 10.9 0.000 0.000 10.952 11.190 dbcsr_dot_sd 1331 12.0 7.841 7.894 8.858 9.602 build_core_hamiltonian_matrix_ 11 4.9 0.001 0.001 8.739 9.012 qs_ks_update_qs_env_forces 11 4.9 0.000 0.000 8.563 8.701 dbcsr_finalize 5433 13.8 0.134 0.146 8.155 8.343 cp_dbcsr_sm_fm_multiply_core 37 10.5 0.000 0.000 7.963 8.329 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="416", plot="h2o_256_md", label="(4n/1r/36t)", y=416.162000, yerr=0.000000 PlotPoint: name="417", plot="h2o_256_md_mem", label="(4n/1r/36t)", y=11673.545455, yerr=2306.322392 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/30/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 23 x 23 x 23 234439235724792 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 234.439236E+12 0.0% 100.0% 0.0% flops max/rank 1.924147E+12 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 9634225188 0.0% 100.0% 0.0% number of processed stacks 9697789 0.0% 100.0% 0.0% average stack size 0.0 993.4 0.0 marketing flops 1.742118E+15 ------------------------------------------------------------------------------- # multiplications 111 max memory usage/rank 684.711936E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 351648 MPI messages size (bytes): total size 4.213128E+12 min size 0.000000E+00 max size 25.408928E+06 average size 11.981093E+06 MPI breakdown and total messages size (bytes): size <= 128 92928 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 0 0 131072 < size <= 4194304 14784 17360358048 4194304 < size <= 16777216 77033 1000974278216 16777216 < size 166903 3194793140616 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 2 12. MP_Allreduce 716 49. MP_Alltoall 310 1558616. MP_ISend 5328 5698210. MP_IRecv 5328 5713852. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 1026 253283. MP_Allreduce 3140 5695. MP_Sync 4 MP_Alltoall 61 4925786. MP_SendRecv 429 12000. MP_ISendRecv 429 12000. MP_Wait 1251 MP_ISend 726 139430. MP_IRecv 726 139314. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.018 0.049 205.682 205.687 qs_energies 1 2.0 0.000 0.001 205.026 205.039 ls_scf 1 3.0 0.000 0.000 204.392 204.406 dbcsr_multiply_generic 111 6.7 0.015 0.046 188.602 190.891 multiply_cannon 111 7.7 0.018 0.023 156.324 173.727 multiply_cannon_loop 111 8.7 0.056 0.070 152.909 170.024 multiply_cannon_multrec 1332 9.7 122.589 145.924 122.712 146.040 ls_scf_main 1 4.0 0.000 0.000 119.786 119.789 density_matrix_trs4 2 5.0 0.002 0.003 112.732 113.549 ls_scf_init_scf 1 4.0 0.000 0.000 76.345 76.347 ls_scf_init_matrix_S 1 5.0 0.000 0.000 74.297 74.823 matrix_sqrt_Newton_Schulz 2 6.5 0.001 0.002 69.335 69.364 mp_waitall_1 12957 10.9 30.820 45.301 30.820 45.301 mp_sum_l 898 5.1 18.267 30.873 18.267 30.873 multiply_cannon_metrocomm1 1332 9.7 0.011 0.017 16.730 27.651 dbcsr_multiply_generic_mpsum_f 86 7.8 0.000 0.000 13.772 24.530 multiply_cannon_metrocomm3 1332 9.7 0.007 0.014 4.181 22.213 make_m2s 222 7.7 0.007 0.008 13.200 13.640 make_images 222 8.7 0.063 0.071 13.184 13.625 mp_irecv_dv 3257 11.0 4.319 12.008 4.319 12.008 make_images_data 222 9.7 0.004 0.006 10.088 11.113 hybrid_alltoall_any 227 10.6 0.164 2.441 7.884 10.036 ls_scf_post 1 4.0 0.000 0.000 8.260 8.275 ls_scf_store_result 1 5.0 0.000 0.000 7.747 8.114 multiply_cannon_metrocomm4 1221 9.7 0.007 0.015 2.643 7.266 make_images_sizes 222 9.7 0.000 0.001 0.782 7.019 mp_alltoall_i44 222 10.7 0.781 7.019 0.781 7.019 multiply_cannon_metrocomm2 1221 9.7 0.008 0.014 1.729 6.916 calculate_norms 2376 9.8 4.858 6.006 4.858 6.006 apply_matrix_preconditioner 6 5.3 0.000 0.000 4.899 5.602 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(4n/36r/1t)", y=205.687000, yerr=0.000000 PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(4n/36r/1t)", y=633.000000, yerr=0.000000 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/31/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 23 x 23 x 23 234439235724792 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 234.439236E+12 0.0% 100.0% 0.0% flops max/rank 3.668084E+12 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 9634225188 0.0% 100.0% 0.0% number of processed stacks 9699679 0.0% 100.0% 0.0% average stack size 0.0 993.3 0.0 marketing flops 1.742118E+15 ------------------------------------------------------------------------------- # multiplications 111 max memory usage/rank 1.130062E+09 # max total images/rank 2 # max 3D layers 1 # MPI messages exchanged 167832 MPI messages size (bytes): total size 3.077601E+12 min size 0.000000E+00 max size 46.966736E+06 average size 18.337394E+06 MPI breakdown and total messages size (bytes): size <= 128 40320 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 320 37241600 131072 < size <= 4194304 9250 5469089776 4194304 < size <= 16777216 22076 261636692280 16777216 < size 95866 2810458169672 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 1026 259847. MP_Allreduce 3139 7842. MP_Sync 4 MP_Alltoall 54 22313188. MP_SendRecv 213 26880. MP_ISendRecv 213 26880. MP_Wait 945 MP_ISend 642 238274. MP_IRecv 642 237962. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.057 0.087 341.571 341.574 qs_energies 1 2.0 0.000 0.001 340.924 340.939 ls_scf 1 3.0 0.002 0.010 339.779 339.795 dbcsr_multiply_generic 111 6.7 0.019 0.025 315.263 317.862 multiply_cannon 111 7.7 0.026 0.030 269.571 294.289 multiply_cannon_loop 111 8.7 0.087 0.096 263.824 289.902 multiply_cannon_multrec 1332 9.7 235.403 266.738 235.560 266.881 ls_scf_main 1 4.0 0.001 0.002 196.765 196.767 density_matrix_trs4 2 5.0 0.003 0.008 185.826 186.854 ls_scf_init_scf 1 4.0 0.000 0.001 129.299 129.302 ls_scf_init_matrix_S 1 5.0 0.000 0.000 126.068 126.844 matrix_sqrt_Newton_Schulz 2 6.5 0.001 0.005 117.435 117.456 mp_waitall_1 10071 10.9 25.428 52.953 25.428 52.953 mp_sum_l 898 5.1 24.363 42.738 24.363 42.738 multiply_cannon_metrocomm3 1332 9.7 0.009 0.012 8.023 38.252 dbcsr_multiply_generic_mpsum_f 86 7.8 0.000 0.000 19.740 35.115 multiply_cannon_metrocomm1 1332 9.7 0.006 0.007 5.993 22.937 make_m2s 222 7.7 0.009 0.012 19.754 20.522 make_images 222 8.7 1.612 2.053 19.722 20.494 mp_irecv_dv 3391 11.0 6.360 17.867 6.360 17.867 multiply_cannon_metrocomm4 1221 9.7 0.010 0.012 5.277 13.963 ls_scf_post 1 4.0 0.000 0.001 13.713 13.730 ls_scf_store_result 1 5.0 0.000 0.000 12.960 13.520 make_images_data 222 9.7 0.006 0.006 11.922 13.453 hybrid_alltoall_any 227 10.6 0.415 2.871 10.235 12.482 calculate_norms 2376 9.8 7.541 8.532 7.541 8.532 apply_matrix_preconditioner 6 5.3 0.000 0.000 6.964 8.121 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(4n/18r/2t)", y=341.574000, yerr=0.000000 PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(4n/18r/2t)", y=1023.000000, yerr=0.000000 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/32/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 23 x 23 x 23 234439235724792 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 234.439236E+12 0.0% 100.0% 0.0% flops max/rank 7.107463E+12 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 9634225188 0.0% 100.0% 0.0% number of processed stacks 9667097 0.0% 100.0% 0.0% average stack size 0.0 996.6 0.0 marketing flops 1.742118E+15 ------------------------------------------------------------------------------- # multiplications 111 max memory usage/rank 2.030060E+09 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 39960 MPI messages size (bytes): total size 1.915058E+12 min size 0.000000E+00 max size 93.908080E+06 average size 47.924364E+06 MPI breakdown and total messages size (bytes): size <= 128 9600 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 0 0 131072 < size <= 4194304 1920 1170063360 4194304 < size <= 16777216 720 6721008480 16777216 < size 27720 1907167008560 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 2 12. MP_Allreduce 716 87. MP_Alltoall 310 5824960. MP_ISend 2664 20322846. MP_IRecv 2664 20175729. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 1026 259409. MP_Allreduce 3138 10459. MP_Sync 4 MP_Alltoall 47 20667983. MP_SendRecv 105 57600. MP_ISendRecv 105 57600. MP_Wait 567 MP_ISend 378 618054. MP_IRecv 378 618834. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.031 0.054 350.169 350.171 qs_energies 1 2.0 0.000 0.000 349.567 349.580 ls_scf 1 3.0 0.000 0.000 348.244 348.258 dbcsr_multiply_generic 111 6.7 0.019 0.020 320.988 324.871 multiply_cannon 111 7.7 0.026 0.030 266.828 294.930 multiply_cannon_loop 111 8.7 0.065 0.075 259.080 288.212 multiply_cannon_multrec 666 9.7 233.070 267.902 233.207 268.013 ls_scf_main 1 4.0 0.000 0.001 200.482 200.501 density_matrix_trs4 2 5.0 0.003 0.005 188.330 189.757 ls_scf_init_scf 1 4.0 0.000 0.000 133.406 133.408 ls_scf_init_matrix_S 1 5.0 0.000 0.000 129.729 130.767 matrix_sqrt_Newton_Schulz 2 6.5 0.001 0.004 120.982 120.997 mp_waitall_1 7293 11.0 28.589 57.315 28.589 57.315 mp_sum_l 898 5.1 26.979 46.031 26.979 46.031 dbcsr_multiply_generic_mpsum_f 86 7.8 0.000 0.000 21.570 38.158 multiply_cannon_metrocomm1 666 9.7 0.004 0.005 10.691 32.898 make_m2s 222 7.7 0.007 0.007 24.042 25.694 make_images 222 8.7 3.131 3.891 24.000 25.654 make_images_data 222 9.7 0.004 0.005 15.935 18.741 hybrid_alltoall_any 227 10.6 0.661 4.834 15.222 17.701 mp_irecv_dv 1601 11.0 6.479 16.941 6.479 16.941 multiply_cannon_metrocomm3 666 9.7 0.003 0.004 2.613 16.555 ls_scf_post 1 4.0 0.000 0.000 14.356 14.372 multiply_cannon_metrocomm4 555 9.7 0.003 0.004 4.068 14.367 ls_scf_store_result 1 5.0 0.000 0.000 13.403 14.075 multiply_cannon_metrocomm2 555 9.7 0.004 0.005 3.266 12.104 apply_matrix_preconditioner 6 5.3 0.000 0.000 7.627 8.794 make_images_sizes 222 9.7 0.000 0.000 1.781 8.005 mp_alltoall_i44 222 10.7 1.780 8.005 1.780 8.005 mp_allgather_i34 111 8.7 2.063 7.138 2.063 7.138 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(4n/9r/4t)", y=350.171000, yerr=0.000000 PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(4n/9r/4t)", y=1351.000000, yerr=0.000000 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/33/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 23 x 23 x 23 234439235724792 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 234.439236E+12 0.0% 100.0% 0.0% flops max/rank 10.747127E+12 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 9634225188 0.0% 100.0% 0.0% number of processed stacks 9703792 0.0% 100.0% 0.0% average stack size 0.0 992.8 0.0 marketing flops 1.742116E+15 ------------------------------------------------------------------------------- # multiplications 111 max memory usage/rank 2.919064E+09 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 50616 MPI messages size (bytes): total size 1.536549E+12 min size 0.000000E+00 max size 72.286792E+06 average size 30.356988E+06 MPI breakdown and total messages size (bytes): size <= 128 10368 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 1056 104411904 131072 < size <= 4194304 3168 831638784 4194304 < size <= 16777216 3103 33613273640 16777216 < size 32921 1501999894888 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 1026 266673. MP_Allreduce 3138 13030. MP_Sync 4 MP_Alltoall 47 30278988. MP_SendRecv 69 86400. MP_ISendRecv 69 86400. MP_Wait 531 MP_ISend 378 823502. MP_IRecv 378 823753. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.050 0.067 344.159 344.167 qs_energies 1 2.0 0.007 0.030 343.305 343.369 ls_scf 1 3.0 0.000 0.001 341.713 341.737 dbcsr_multiply_generic 111 6.7 0.021 0.023 315.342 317.631 multiply_cannon 111 7.7 0.028 0.031 259.837 277.727 multiply_cannon_loop 111 8.7 0.096 0.108 251.941 270.131 multiply_cannon_multrec 1332 9.7 237.124 257.876 237.238 257.989 ls_scf_main 1 4.0 0.003 0.026 198.382 198.391 density_matrix_trs4 2 5.0 0.004 0.021 186.697 187.528 ls_scf_init_scf 1 4.0 0.000 0.002 129.231 129.234 ls_scf_init_matrix_S 1 5.0 0.000 0.001 126.134 126.687 matrix_sqrt_Newton_Schulz 2 6.5 0.001 0.003 117.263 117.283 mp_sum_l 898 5.1 18.987 38.085 18.987 38.085 make_m2s 222 7.7 0.008 0.010 32.614 33.343 make_images 222 8.7 3.676 4.093 32.554 33.286 dbcsr_multiply_generic_mpsum_f 86 7.8 0.000 0.000 15.400 31.176 mp_waitall_1 6369 11.0 23.398 29.017 23.398 29.017 make_images_data 222 9.7 0.005 0.009 19.022 21.125 hybrid_alltoall_any 227 10.6 0.876 3.960 17.757 20.714 multiply_cannon_metrocomm4 1110 9.7 0.006 0.011 4.417 15.066 mp_irecv_dv 3229 10.9 4.412 15.041 4.412 15.041 ls_scf_post 1 4.0 0.007 0.027 14.099 14.126 ls_scf_store_result 1 5.0 0.000 0.000 13.386 13.846 apply_matrix_preconditioner 6 5.3 0.000 0.000 7.169 8.440 multiply_cannon_metrocomm1 1332 9.7 0.004 0.006 3.850 8.176 dbcsr_data_release 10900 10.7 7.192 7.829 7.192 7.829 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(4n/6r/6t)", y=344.167000, yerr=0.000000 PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(4n/6r/6t)", y=1999.000000, yerr=0.000000 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/34/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 23 x 23 x 23 234439235724792 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 234.439236E+12 0.0% 100.0% 0.0% flops max/rank 15.383312E+12 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 9634225188 0.0% 100.0% 0.0% number of processed stacks 9657067 0.0% 100.0% 0.0% average stack size 0.0 997.6 0.0 marketing flops 1.742118E+15 ------------------------------------------------------------------------------- # multiplications 111 max memory usage/rank 4.235100E+09 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 10656 MPI messages size (bytes): total size 1.149035E+12 min size 0.000000E+00 max size 203.538048E+06 average size 107.829832E+06 MPI breakdown and total messages size (bytes): size <= 128 2304 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 0 0 131072 < size <= 4194304 768 702038016 4194304 < size <= 16777216 0 0 16777216 < size 7584 1148332810224 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 2 12. MP_Allreduce 716 126. MP_Alltoall 310 12920694. MP_ISend 1776 40180426. MP_IRecv 1776 40465032. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 1026 265536. MP_Allreduce 3129 15263. MP_Sync 4 MP_Alltoall 47 46208988. MP_SendRecv 45 115200. MP_ISendRecv 45 115200. MP_Wait 528 MP_ISend 420 924980. MP_IRecv 420 924528. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.053 0.073 353.473 353.955 qs_energies 1 2.0 0.000 0.000 352.466 352.951 ls_scf 1 3.0 0.000 0.000 350.530 351.013 dbcsr_multiply_generic 111 6.7 0.022 0.025 320.525 322.104 multiply_cannon 111 7.7 0.030 0.032 267.704 281.405 multiply_cannon_loop 111 8.7 0.073 0.083 257.133 266.481 multiply_cannon_multrec 444 9.7 237.685 248.929 237.799 249.044 ls_scf_main 1 4.0 0.000 0.001 205.582 205.862 density_matrix_trs4 2 5.0 0.003 0.005 192.361 193.031 ls_scf_init_scf 1 4.0 0.000 0.000 130.906 131.085 ls_scf_init_matrix_S 1 5.0 0.000 0.000 127.839 128.339 matrix_sqrt_Newton_Schulz 2 6.5 0.001 0.003 118.739 118.900 mp_waitall_1 5436 11.0 33.998 39.520 33.998 39.520 make_m2s 222 7.7 0.006 0.007 34.566 37.997 make_images 222 8.7 3.682 4.428 34.496 37.921 make_images_data 222 9.7 0.004 0.006 24.657 29.290 hybrid_alltoall_any 227 10.6 1.177 5.567 22.864 28.223 mp_sum_l 898 5.1 13.014 25.132 13.014 25.132 dbcsr_multiply_generic_mpsum_f 86 7.8 0.000 0.000 10.166 20.149 ls_scf_post 1 4.0 0.000 0.000 14.042 14.065 mp_allgather_i34 111 8.7 3.540 14.061 3.540 14.061 ls_scf_store_result 1 5.0 0.000 0.000 13.481 13.815 make_images_sizes 222 9.7 0.000 0.001 2.478 11.680 mp_alltoall_i44 222 10.7 2.478 11.679 2.478 11.679 multiply_cannon_metrocomm1 444 9.7 0.003 0.004 8.901 11.657 dbcsr_data_release 10924 10.7 8.590 9.510 8.590 9.510 apply_matrix_preconditioner 6 5.3 0.000 0.000 7.822 8.680 multiply_cannon_metrocomm4 333 9.7 0.002 0.003 3.024 7.933 mp_irecv_dv 1241 11.2 3.027 7.925 3.027 7.925 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(4n/4r/9t)", y=353.955000, yerr=0.000000 PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(4n/4r/9t)", y=2724.000000, yerr=0.000000 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/35/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 23 x 23 x 23 234439235724792 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 234.439236E+12 0.0% 100.0% 0.0% flops max/rank 20.557908E+12 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 9634225188 0.0% 100.0% 0.0% number of processed stacks 9673530 0.0% 100.0% 0.0% average stack size 0.0 995.9 0.0 marketing flops 1.742116E+15 ------------------------------------------------------------------------------- # multiplications 111 max memory usage/rank 5.652247E+09 # max total images/rank 3 # max 3D layers 1 # MPI messages exchanged 10656 MPI messages size (bytes): total size 1.158041E+12 min size 0.000000E+00 max size 265.321008E+06 average size 108.674984E+06 MPI breakdown and total messages size (bytes): size <= 128 1536 0 128 < size <= 8192 0 0 8192 < size <= 32768 0 0 32768 < size <= 131072 0 0 131072 < size <= 4194304 1536 702038016 4194304 < size <= 16777216 72 672100848 16777216 < size 7512 1156666219168 ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 1026 284877. MP_Allreduce 3129 18839. MP_Sync 4 MP_Alltoall 47 60354741. MP_SendRecv 33 144000. MP_ISendRecv 33 144000. MP_Wait 432 MP_ISend 336 1403879. MP_IRecv 336 1404443. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.106 0.133 359.897 359.934 qs_energies 1 2.0 0.000 0.003 358.520 358.550 ls_scf 1 3.0 0.003 0.013 356.175 356.207 dbcsr_multiply_generic 111 6.7 0.024 0.026 322.948 323.960 multiply_cannon 111 7.7 0.031 0.033 266.308 282.409 multiply_cannon_loop 111 8.7 0.088 0.093 256.067 271.115 multiply_cannon_multrec 666 9.7 224.529 230.546 224.613 230.633 ls_scf_main 1 4.0 0.003 0.012 214.196 214.218 density_matrix_trs4 2 5.0 0.003 0.005 199.005 199.509 ls_scf_init_scf 1 4.0 0.000 0.002 127.915 127.925 ls_scf_init_matrix_S 1 5.0 0.000 0.001 124.103 124.379 matrix_sqrt_Newton_Schulz 2 6.5 0.001 0.003 115.265 115.278 mp_waitall_1 5424 11.0 39.530 57.443 39.530 57.443 make_m2s 222 7.7 0.007 0.008 36.986 39.965 make_images 222 8.7 4.492 4.988 36.897 39.874 multiply_cannon_metrocomm3 666 9.7 0.004 0.005 16.457 35.687 multiply_cannon_metrocomm4 555 9.7 0.003 0.005 8.306 28.077 mp_irecv_dv 1779 11.1 8.268 27.892 8.268 27.892 hybrid_alltoall_any 227 10.6 1.613 3.506 23.756 27.818 make_images_data 222 9.7 0.005 0.007 23.853 27.147 mp_sum_l 898 5.1 16.204 26.444 16.204 26.444 dbcsr_multiply_generic_mpsum_f 86 7.8 0.000 0.000 13.223 23.098 ls_scf_post 1 4.0 0.001 0.003 14.061 14.072 ls_scf_store_result 1 5.0 0.000 0.000 13.558 13.785 dbcsr_data_release 12835 10.6 6.975 11.653 6.975 11.653 apply_matrix_preconditioner 6 5.3 0.000 0.000 9.313 9.777 ls_scf_dm_to_ks 2 5.0 0.000 0.000 7.433 7.483 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(4n/3r/12t)", y=359.934000, yerr=0.000000 PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(4n/3r/12t)", y=3626.000000, yerr=0.000000 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ~~~~~~~~~ RESULT ~~~~~~~~~ RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC40-gnu/cebb65e3a93fe490e8f7435957a27a406ba398e1_performance_tests/36/result.log @@@@@@@@@@ Run number: 1 @@@@@@@@@@ ------------------------------------------------------------------------------- - - - DBCSR STATISTICS - - - ------------------------------------------------------------------------------- COUNTER TOTAL BLAS SMM ACC flops 64 x 64 x 64 109521666048 0.0% 100.0% 0.0% flops 32 x 32 x 849 202529832960 0.0% 100.0% 0.0% flops 32 x 32 x 853 203484037120 0.0% 100.0% 0.0% flops 32 x 32 x 858 204676792320 0.0% 100.0% 0.0% flops 64 x 64 x 96 328564998144 0.0% 100.0% 0.0% flops 64 x 96 x 64 328564998144 0.0% 100.0% 0.0% flops 96 x 64 x 64 328564998144 0.0% 100.0% 0.0% flops 9 x 32 x 32 549621596160 0.0% 100.0% 0.0% flops 22 x 32 x 32 671759728640 0.0% 100.0% 0.0% flops 64 x 64 x 849 936533557248 0.0% 100.0% 0.0% flops 64 x 64 x 853 940945965056 0.0% 100.0% 0.0% flops 64 x 64 x 858 946461474816 0.0% 100.0% 0.0% flops 96 x 96 x 64 985694994432 0.0% 100.0% 0.0% flops 64 x 96 x 96 985694994432 0.0% 100.0% 0.0% flops 96 x 64 x 96 985694994432 0.0% 100.0% 0.0% flops 849 x 64 x 64 1285508038656 0.0% 100.0% 0.0% flops 853 x 64 x 64 1291564613632 0.0% 100.0% 0.0% flops 858 x 64 x 64 1299135332352 0.0% 100.0% 0.0% flops 9 x 9 x 64 1833777211392 0.0% 100.0% 0.0% flops 9 x 22 x 64 2466560397312 0.0% 100.0% 0.0% flops 22 x 9 x 64 2471027226624 0.0% 100.0% 0.0% flops 64 x 96 x 849 2809600671744 0.0% 100.0% 0.0% flops 96 x 64 x 849 2809600671744 0.0% 100.0% 0.0% flops 64 x 96 x 853 2822837895168 0.0% 100.0% 0.0% flops 96 x 64 x 853 2822837895168 0.0% 100.0% 0.0% flops 64 x 96 x 858 2839384424448 0.0% 100.0% 0.0% flops 96 x 64 x 858 2839384424448 0.0% 100.0% 0.0% flops 849 x 64 x 849 2928781688832 0.0% 100.0% 0.0% flops 849 x 64 x 853 2942580424704 0.0% 100.0% 0.0% flops 853 x 64 x 849 2942580424704 0.0% 100.0% 0.0% flops 853 x 64 x 853 2956444172288 0.0% 100.0% 0.0% flops 96 x 96 x 96 2957084983296 0.0% 100.0% 0.0% flops 849 x 64 x 858 2959828844544 0.0% 100.0% 0.0% flops 858 x 64 x 849 2959828844544 0.0% 100.0% 0.0% flops 853 x 64 x 858 2973773856768 0.0% 100.0% 0.0% flops 858 x 64 x 853 2973773856768 0.0% 100.0% 0.0% flops 858 x 64 x 858 2991205122048 0.0% 100.0% 0.0% flops 22 x 22 x 64 3338610130944 0.0% 100.0% 0.0% flops 849 x 64 x 96 3856524115968 0.0% 100.0% 0.0% flops 849 x 96 x 64 3856524115968 0.0% 100.0% 0.0% flops 853 x 64 x 96 3874693840896 0.0% 100.0% 0.0% flops 853 x 96 x 64 3874693840896 0.0% 100.0% 0.0% flops 858 x 96 x 64 3897405997056 0.0% 100.0% 0.0% flops 858 x 64 x 96 3897405997056 0.0% 100.0% 0.0% flops 9 x 9 x 96 5501331634176 0.0% 100.0% 0.0% flops 9 x 22 x 96 7399681191936 0.0% 100.0% 0.0% flops 22 x 9 x 96 7413081679872 0.0% 100.0% 0.0% flops 96 x 96 x 849 8428802015232 0.0% 100.0% 0.0% flops 96 x 96 x 853 8468513685504 0.0% 100.0% 0.0% flops 96 x 96 x 858 8518153273344 0.0% 100.0% 0.0% flops 849 x 96 x 849 8786345066496 0.0% 100.0% 0.0% flops 849 x 96 x 853 8827741274112 0.0% 100.0% 0.0% flops 853 x 96 x 849 8827741274112 0.0% 100.0% 0.0% flops 853 x 96 x 853 8869332516864 0.0% 100.0% 0.0% flops 858 x 96 x 849 8879486533632 0.0% 100.0% 0.0% flops 849 x 96 x 858 8879486533632 0.0% 100.0% 0.0% flops 858 x 96 x 853 8921321570304 0.0% 100.0% 0.0% flops 853 x 96 x 858 8921321570304 0.0% 100.0% 0.0% flops 858 x 96 x 858 8973615366144 0.0% 100.0% 0.0% flops 22 x 22 x 96 10015830392832 0.0% 100.0% 0.0% flops 849 x 96 x 96 11569572347904 0.0% 100.0% 0.0% flops 853 x 96 x 96 11624081522688 0.0% 100.0% 0.0% flops 858 x 96 x 96 11692217991168 0.0% 100.0% 0.0% flops 9 x 32 x 9 21312216612864 0.0% 100.0% 0.0% flops 22 x 32 x 9 29317892972544 0.0% 100.0% 0.0% flops 9 x 32 x 22 29317892972544 0.0% 100.0% 0.0% flops 22 x 32 x 22 40107728764928 0.0% 100.0% 0.0% flops inhomo. stacks 0 0.0% 0.0% 0.0% flops total 383.054662E+12 0.0% 100.0% 0.0% flops max/rank 769.048094E+09 0.0% 100.0% 0.0% matmuls inhomo. stacks 0 0.0% 0.0% 0.0% matmuls total 11370092824 0.0% 100.0% 0.0% number of processed stacks 36472128 0.0% 100.0% 0.0% average stack size 0.0 311.7 0.0 marketing flops 780.451392E+12 ------------------------------------------------------------------------------- # multiplications 1445 max memory usage/rank 364.511232E+06 # max total images/rank 1 # max 3D layers 1 # MPI messages exchanged 38286720 MPI messages size (bytes): total size 22.066386E+12 min size 0.000000E+00 max size 5.889312E+06 average size 576.345688E+03 MPI breakdown and total messages size (bytes): size <= 128 274344 0 128 < size <= 8192 0 0 8192 < size <= 32768 1746712 57194053632 32768 < size <= 131072 13942784 856644648960 131072 < size <= 4194304 21501504 16367441085440 4194304 < size <= 16777216 821376 4784862003200 16777216 < size 0 0 ------------------------------------------------------------------------------- - - - DBCSR MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Bcast 68 12. MP_Allreduce 7415 50. MP_Alltoall 5329 496642. MP_ISend 138692 290073. MP_IRecv 138692 288852. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - MESSAGE PASSING PERFORMANCE - - - ------------------------------------------------------------------------------- ROUTINE CALLS AVE VOLUME [Bytes] MP_Group 4 MP_Bcast 4640 77325. MP_Allreduce 13232 2300. MP_Sync 1064 MP_Alltoall 2588 4826755. MP_SendRecv 126500 14304. MP_ISendRecv 69000 14304. MP_Wait 78200 MP_comm_split 40 MP_ISend 24680 99620. MP_IRecv 36480 68885. ------------------------------------------------------------------------------- ------------------------------------------------------------------------------- - - - T I M I N G - - - ------------------------------------------------------------------------------- SUBROUTINE CALLS ASD SELF TIME TOTAL TIME MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM CP2K 1 1.0 0.033 0.137 267.271 267.279 qs_mol_dyn_low 1 2.0 0.019 0.082 266.066 266.098 qs_forces 5 3.8 0.007 0.048 265.656 265.686 qs_energies 5 4.8 0.001 0.002 260.689 260.748 scf_env_do_scf 5 5.8 0.000 0.001 249.860 249.871 scf_env_do_scf_inner_loop 105 6.6 0.002 0.008 203.719 203.726 velocity_verlet 4 3.0 0.003 0.025 133.440 133.449 qs_scf_new_mos 105 7.6 0.001 0.001 121.857 122.748 qs_scf_loop_do_ot 105 8.6 0.001 0.001 121.857 122.747 ot_scf_mini 105 9.6 0.004 0.006 110.123 110.787 dbcsr_multiply_generic 1445 12.2 0.136 0.170 106.439 109.156 multiply_cannon 1445 13.2 0.188 0.227 86.920 93.667 multiply_cannon_loop 1445 14.2 0.343 0.534 83.746 89.638 mp_waitall_1 372490 16.1 52.623 67.175 52.623 67.175 rebuild_ks_matrix 110 8.4 0.001 0.001 51.013 51.610 qs_ks_build_kohn_sham_matrix 110 9.4 0.013 0.016 51.012 51.609 qs_ks_update_qs_env 112 7.6 0.001 0.002 46.966 47.506 multiply_cannon_metrocomm3 34680 15.2 0.127 0.254 10.157 47.373 multiply_cannon_multrec 34680 15.2 39.377 47.004 39.388 47.015 ot_mini 105 10.6 0.001 0.002 45.529 46.341 init_scf_loop 7 6.6 0.000 0.000 46.078 46.082 qs_ot_get_p 112 10.4 0.001 0.002 44.045 45.947 qs_rho_update_rho_low 110 7.6 0.001 0.001 42.010 42.343 calculate_rho_elec 110 8.6 0.047 0.049 42.010 42.343 multiply_cannon_metrocomm1 34680 15.2 0.151 0.285 30.102 40.383 prepare_preconditioner 7 7.6 0.000 0.000 38.863 38.997 make_preconditioner 7 8.6 0.000 0.000 38.863 38.997 make_full_inverse_cholesky 7 9.6 0.000 0.003 30.492 30.841 qs_ot_p2m_diag 40 11.0 0.022 0.029 29.797 29.830 qs_ot_get_derivative 55 11.6 0.001 0.002 28.434 29.162 cp_dbcsr_syevd 40 12.0 0.003 0.003 28.921 28.928 fft_wrap_pw1pw2 1425 12.5 0.020 0.023 23.270 23.716 cp_fm_syevd 40 13.0 0.000 0.001 22.684 22.816 grid_collocate_task_list 110 9.6 20.420 22.118 20.420 22.118 fft_wrap_pw1pw2_240 915 14.0 0.434 0.476 20.850 21.488 density_rs2pw 110 9.6 0.006 0.010 18.876 20.821 sum_up_and_integrate 60 10.3 0.001 0.002 20.697 20.742 integrate_v_rspace 60 11.3 0.002 0.003 20.664 20.710 fft3d_pb 915 15.0 4.972 5.373 18.722 19.395 apply_preconditioner_dbcsr 62 12.6 0.000 0.001 17.766 18.875 apply_single 62 13.6 0.000 0.000 17.766 18.875 qs_vxc_create 110 10.4 0.002 0.003 18.528 18.598 ot_new_cg_direction 55 11.6 0.001 0.004 16.907 16.908 cp_fm_redistribute_end 40 14.0 8.477 16.892 8.482 16.894 cp_fm_syevd_base 40 14.0 8.403 16.823 8.403 16.823 cp_fm_cholesky_invert 7 10.6 15.920 15.931 15.920 15.931 make_m2s 2890 13.2 0.098 0.119 14.483 15.765 make_images 2890 14.2 0.238 0.274 13.265 14.691 transfer_rs2pw 445 10.6 0.007 0.010 12.524 14.478 mp_alltoall_z22v 2340 16.7 12.524 13.689 12.524 13.689 cp_fm_cholesky_decompose 14 10.2 13.213 13.262 13.213 13.262 calculate_dm_sparse 110 9.5 0.001 0.001 12.351 13.193 xc_pw_derive 510 13.4 0.006 0.010 12.658 12.986 xc_vxc_pw_create 60 11.3 0.080 0.094 12.768 12.837 xc_rho_set_and_dset_create 110 12.4 0.139 0.172 12.505 12.583 mp_waitany 6270 13.5 10.346 12.173 10.346 12.173 make_images_data 2890 15.2 0.070 0.092 9.906 11.828 check_diag 80 13.5 10.241 10.493 11.565 11.694 qs_ot_get_derivative_taylor 37 12.8 0.001 0.002 10.098 10.559 grid_integrate_task_list 60 12.3 9.766 10.272 9.766 10.272 hybrid_alltoall_any 2983 16.1 0.102 1.312 7.377 9.831 potential_pw2rs 60 12.3 0.003 0.005 9.171 9.229 mp_sum_l 7231 12.6 4.065 8.626 4.065 8.626 init_scf_run 5 5.8 0.000 0.000 7.961 7.963 scf_env_initial_rho_setup 5 6.8 0.000 0.001 7.961 7.963 mp_irecv_dv 72697 16.1 3.239 7.741 3.239 7.741 multiply_cannon_metrocomm4 33235 15.2 0.117 0.246 3.063 7.542 make_full_single_inverse 7 9.6 0.001 0.001 7.383 7.503 transfer_pw2rs 245 13.2 0.003 0.004 7.086 7.126 cube_transpose_3 560 16.1 0.536 0.721 6.394 7.027 qs_ot_get_derivative_diag 18 12.0 0.001 0.001 6.487 6.765 mp_alltoall_d11v 1300 13.8 6.061 6.655 6.061 6.655 transfer_rs2pw_240 115 11.5 1.069 1.454 3.975 6.513 transfer_rs2pw_80 110 11.6 0.233 0.304 5.440 6.050 mp_allgather_i34 1445 14.2 1.861 5.999 1.861 5.999 make_images_sizes 2890 15.2 0.004 0.008 1.715 5.872 mp_alltoall_i44 2890 16.2 1.711 5.868 1.711 5.868 xc_pw_divergence 60 12.3 0.002 0.003 5.661 5.799 xc_exc_calc 50 11.5 0.019 0.019 5.757 5.768 wfi_extrapolate 5 7.8 0.000 0.001 5.639 5.639 qs_ot_get_orbitals 105 10.6 0.001 0.001 5.366 5.602 ------------------------------------------------------------------------------- ~ ~ ~ ~ DATA POINTS ~ ~ ~ ~ PlotPoint: name="601", plot="h2o_512_md", label="(16n/36r/1t)", y=267.279000, yerr=0.000000 PlotPoint: name="602", plot="h2o_512_md_mem", label="(16n/36r/1t)", y=344.200000, yerr=0.979796 ~ ~ ~ ~ END DATA POINTS ~ ~ ~ ~~~~~~ END RESULT ~~~~~~~~ ========= END RESULTS =========== CommitSHA: cebb65e3a93fe490e8f7435957a27a406ba398e1 Summary: empty Status: OK