=== This is the CP2K Performance-Test ===


Updating e86548823..e64b33155
Fast-forward
 CMakeLists.txt                                     |   4 +-
 README_cmake.md                                    | 177 +++++++++++----------
 cmake/FindBlis.cmake                               |   2 +-
 cmake/FindFftw.cmake                               |  40 +++--
 cmake/FindGenericBLAS.cmake                        |  23 ++-
 cmake/FindLibSPG.cmake                             |   2 +-
 cmake/FindLibVORI.cmake                            |   2 +-
 cmake/FindLibXC.cmake                              |  20 ++-
 cmake/FindLibXSMM.cmake                            |  48 +++---
 cmake/FindLibint2.cmake                            |  19 ++-
 cmake/FindMetis.cmake                              |   4 +
 cmake/FindOpenBLAS.cmake                           |  25 +--
 cmake/FindQuip.cmake                               |   4 +
 cmake/FindSCI.cmake                                |   6 +-
 cmake/cp2k_utils.cmake                             |  16 +-
 src/CMakeLists.txt                                 |   6 +-
 tools/docker/Dockerfile.test_cmake                 |   5 +-
 tools/docker/generate_dockerfiles.py               |   5 +-
 tools/docker/scripts/install_dbcsr.sh              |  24 +--
 tools/toolchain/scripts/stage2/install_openblas.sh |   4 +
 tools/toolchain/scripts/stage3/install_fftw.sh     |   4 +-
 tools/toolchain/scripts/stage3/install_libint.sh   |   5 +-
 tools/toolchain/scripts/stage3/install_libxc.sh    |   2 +
 tools/toolchain/scripts/stage4/install_cosma.sh    |   2 +
 tools/toolchain/scripts/stage4/install_libxsmm.sh  |   3 +
 .../toolchain/scripts/stage4/install_scalapack.sh  |   3 +
 tools/toolchain/scripts/stage5/install_elpa.sh     |   3 +-
 tools/toolchain/scripts/stage5/install_pexsi.sh    |   2 +
 tools/toolchain/scripts/stage5/install_ptscotch.sh |   3 +
 tools/toolchain/scripts/stage5/install_superlu.sh  |   3 +
 tools/toolchain/scripts/stage6/install_gsl.sh      |   4 +-
 tools/toolchain/scripts/stage6/install_plumed.sh   |   3 +
 tools/toolchain/scripts/stage6/install_quip.sh     |   3 +
 tools/toolchain/scripts/stage7/install_hdf5.sh     |   3 +-
 tools/toolchain/scripts/stage7/install_libtorch.sh |   2 +
 tools/toolchain/scripts/stage7/install_libvdwxc.sh |   4 +-
 tools/toolchain/scripts/stage7/install_libvori.sh  |   2 +
 tools/toolchain/scripts/stage7/install_spglib.sh   |   4 +-
 tools/toolchain/scripts/stage8/install_sirius.sh   |   4 +
 tools/toolchain/scripts/stage8/install_spfft.sh    |   4 +-
 tools/toolchain/scripts/stage8/install_spla.sh     |   4 +-
 41 files changed, 306 insertions(+), 197 deletions(-)
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: e64b331552e1588eec11f575b9e771b24f7bf608


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10,
#              COSMA 2.6.2, ELPA 2022.11.001, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.1.0, LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.8.1,
#              SIRIUS 7.3.2, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Author: Matthias Krack (12.01.2023)
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.2
USE_ELPA       := 2022.11.001
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.1.0
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.8.1
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.3.2
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.4
HDF5_VER       := 1.12.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   HDF5_VER       := $(strip $(HDF5_VER))
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(HDF5_VER)/lib
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__HDF5
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta_prefixed_scalapack.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/01
 job id: 44305821
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/02
 job id: 44305824
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/03
 job id: 44305825
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/04
 job id: 44305826
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/05
 job id: 44305828
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/06
 job id: 44305829
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/07
 job id: 44305830
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/08
 job id: 44305831
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/09
 job id: 44305832
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/10
 job id: 44305833
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/11
 job id: 44305834
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/12
 job id: 44305835
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/13
 job id: 44305837
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/14
 job id: 44305840
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/15
 job id: 44305842
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/16
 job id: 44305843
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/17
 job id: 44305844
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/18
 job id: 44305845
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/19
 job id: 44305846
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/20
 job id: 44305847
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/21
 job id: 44305851
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/22
 job id: 44305852
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/23
 job id: 44305854
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/24
 job id: 44305855
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/25
 job id: 44305857
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/26
 job id: 44305859
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          344                      9.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.036  134.988  134.989
 farming_run                          1  2.0  134.419  134.420  134.960  134.965
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.458106E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              228                1113141.
 MP_Allreduce          485                2282278.
 MP_Sync                27
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split           8
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.045  116.103  116.117
 qs_energies                          1  2.0    0.000    0.000  115.887  115.890
 mp2_main                             1  3.0    0.000    0.000  113.405  113.408
 mp2_gpw_main                         1  4.0    0.020    0.026  112.035  112.038
 mp2_ri_gpw_compute_in                1  5.0    0.171    0.173   93.287   93.774
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.005   55.429   55.915
 mp2_eri_3c_integrate_gpw           272  7.0    0.153    0.170   41.746   47.207
 get_2c_integrals                     1  6.0    0.000    0.001   37.100   37.685
 integrate_v_rspace                 273  8.0    0.436    0.448   25.105   30.273
 pw_transfer                       6555 10.6    0.373    0.379   27.361   27.865
 grid_integrate_task_list           273  9.0   20.913   26.576   20.913   26.576
 fft_wrap_pw1pw2                   5465 11.4    0.044    0.048   26.054   26.557
 fft_wrap_pw1pw2_100               2178 12.4    1.165    1.240   23.577   24.077
 compute_2c_integrals                 1  7.0    0.004    0.004   19.345   19.346
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.004   18.818   19.028
 mp2_eri_2c_integrate_gpw             1  9.0    2.384    2.423   18.815   19.024
 rpa_ri_compute_en                    1  5.0    0.001    0.012   18.640   18.777
 cp_fm_cholesky_decompose            12  8.2   17.726   18.296   17.726   18.296
 cholesky_decomp                      1  7.0    0.000    0.000   16.591   17.159
 fft3d_s                           5443 13.4   16.125   16.616   16.147   16.638
 ao_to_mo_and_store_B_mult_1        272  7.0   10.853   15.570   10.853   15.570
 calculate_wavefunction             272  8.0    5.434    5.569   12.564   13.197
 rpa_num_int                          1  6.0    0.001    0.009   10.490   10.500
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.451   10.473
 calc_mat_Q                           8  8.0    0.000    0.000    9.283    9.382
 contract_S_to_Q                      8  9.0    0.000    0.000    8.708    8.805
 calc_potential_gpw                 544  9.5    0.005    0.006    8.248    8.589
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.002    8.188    8.403
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.302    8.371
 parallel_gemm_fm_cosma              14 10.1    8.302    8.371    8.302    8.371
 potential_pw2rs                    545 10.0    0.107    0.108    7.672    8.306
 collocate_single_gaussian          272 10.0    0.040    0.042    7.427    7.659
 create_integ_mat                     1  6.0    0.013    0.028    7.560    7.569
 array2fm                             1  7.0    0.000    0.000    6.735    7.083
 pw_scatter_s                      2720 13.7    4.449    4.660    4.449    4.660
 pw_gather_s                       2722 13.2    3.907    4.227    3.907    4.227
 array2fm_buffer_send                 1  8.0    2.988    3.155    2.988    3.155
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=112.038117, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2730.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          344                     10.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.029    0.038  397.824  397.825
 farming_run                          1  2.0  396.798  396.802  397.780  397.781
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827128576       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788821       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.226256E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              703                 408373.
 MP_Allreduce         1821                  23730.
 MP_Sync                38
 MP_Alltoall            77                2368424.
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.050  209.862  209.863
 qs_energies                          1  2.0    0.000    0.000  209.571  209.579
 scf_env_do_scf                       1  3.0    0.000    0.000  106.774  106.774
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  105.856  105.864
 rebuild_ks_matrix                    4  6.0    0.000    0.000  105.855  105.862
 qs_ks_build_kohn_sham_matrix         4  7.0    0.057    0.065  105.855  105.862
 hfx_ks_matrix                        4  8.0    0.001    0.001  105.453  105.458
 integrate_four_center                4  9.0    0.145    0.462  105.452  105.457
 mp2_main                             1  3.0    0.000    0.000  102.486  102.494
 mp2_gpw_main                         1  4.0    0.032    0.048  101.289  101.299
 integrate_four_center_main           4 10.0    0.118    0.506   96.715   99.711
 integrate_four_center_bin          264 11.0   96.596   99.695   96.596   99.695
 init_scf_loop                        1  4.0    0.000    0.000   92.469   92.469
 mp2_ri_gpw_compute_in                1  5.0    0.064    0.064   74.824   75.865
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   54.397   55.438
 mp2_eri_3c_integrate_gpw            91  7.0    0.145    0.164   42.180   47.137
 integrate_v_rspace                  95  8.0    0.399    0.572   28.552   33.341
 pw_transfer                       2240 10.6    0.143    0.163   29.947   30.350
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.020   28.951   29.355
 grid_integrate_task_list            95  9.0   23.826   28.838   23.826   28.838
 ao_to_mo_and_store_B_mult_1         91  7.0   10.534   28.732   10.534   28.732
 mp2_ri_gpw_compute_en                1  5.0    0.054    0.063   26.310   28.001
 fft_wrap_pw1pw2_100                730 12.4    1.286    1.438   26.656   27.113
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.824    1.886   24.623   24.633
 get_2c_integrals                     1  6.0    0.000    0.000   20.340   20.363
 compute_2c_integrals                 1  7.0    0.002    0.003   19.322   19.325
 compute_2c_integrals_loop_lm         1  8.0    0.001    0.002   18.909   19.189
 mp2_eri_2c_integrate_gpw             1  9.0    1.734    1.851   18.908   19.188
 fft3d_s                           1823 13.4   18.431   18.752   18.445   18.766
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.000   14.302   14.302
 calculate_wavefunction              91  8.0    2.033    2.064    9.760    9.979
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.560    0.587    8.733    9.244
 potential_pw2rs                    186 10.0    0.034    0.035    8.634    9.238
 local_gemm                         172  8.0    8.173    8.672    8.173    8.672
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001    8.266    8.522
 calc_potential_gpw                 182  9.5    0.002    0.002    7.952    8.170
 collocate_single_gaussian           91 10.0    0.017    0.024    7.900    8.087
 mp2_ri_gpw_compute_en_comm          22  7.0    0.542    0.560    7.678    8.070
 mp2_ri_gpw_compute_en_ener         172  7.0    6.344    6.449    6.344    6.449
 mp_sendrecv_dm3                   2068  8.0    5.603    5.974    5.603    5.974
 pw_gather_s                        912 13.2    4.902    5.470    4.902    5.470
 mp_sync                             38 10.4    3.401    5.335    3.401    5.335
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=101.283118, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1513.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             452.157440E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62385.
 MP_Allreduce        10249                    271.
 MP_Sync               580
 MP_Alltoall          2083                 589622.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.044   53.705   53.711
 qs_mol_dyn_low                       1  2.0    0.003    0.003   53.469   53.477
 qs_forces                           11  3.9    0.002    0.002   53.406   53.407
 qs_energies                         11  4.9    0.002    0.004   51.815   51.839
 scf_env_do_scf                      11  5.9    0.000    0.001   45.360   45.360
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   43.188   43.189
 dbcsr_multiply_generic            2286 12.5    0.094    0.098   33.877   34.316
 qs_scf_new_mos                     108  7.5    0.000    0.001   32.276   32.589
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   32.276   32.588
 ot_scf_mini                        108  9.5    0.002    0.002   30.624   30.828
 multiply_cannon                   2286 13.5    0.187    0.196   26.225   27.644
 multiply_cannon_loop              2286 14.5    1.487    1.560   25.412   26.773
 velocity_verlet                     10  3.0    0.001    0.002   26.226   26.227
 ot_mini                            108 10.5    0.001    0.001   19.312   19.585
 qs_ot_get_derivative               108 11.5    0.001    0.001   16.385   16.571
 mp_waitall_1                    245248 16.5    8.491   14.725    8.491   14.725
 multiply_cannon_metrocomm3       54864 15.5    0.068    0.074    5.827   13.090
 multiply_cannon_multrec          54864 15.5    4.245    6.486    7.906   11.515
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.300    8.436
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.011    8.299    8.436
 multiply_cannon_sync_h2d         54864 15.5    5.889    7.478    5.889    7.478
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.338    7.468
 mp_sum_l                          7207 12.9    5.366    7.090    5.366    7.090
 qs_ot_get_p                        119 10.4    0.001    0.001    6.671    7.022
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.532    5.900
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    5.489    5.610
 init_scf_run                        11  5.9    0.000    0.001    5.166    5.166
 scf_env_initial_rho_setup           11  6.9    0.009    0.012    5.166    5.166
 dbcsr_mm_accdrv_process          76910 16.1    1.166    1.836    3.582    5.079
 sum_up_and_integrate               119 10.3    0.012    0.015    4.836    4.842
 integrate_v_rspace                 119 11.3    0.002    0.002    4.823    4.830
 qs_rho_update_rho_low              119  7.7    0.000    0.001    4.636    4.734
 calculate_rho_elec                 119  8.7    0.011    0.017    4.636    4.733
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    3.732    3.844
 calculate_dm_sparse                119  9.5    0.000    0.001    3.225    3.351
 multiply_cannon_metrocomm1       54864 15.5    0.053    0.058    1.817    3.244
 rs_pw_transfer                     974 11.9    0.011    0.012    3.058    3.189
 jit_kernel_multiply                 13 15.8    2.354    3.159    2.354    3.159
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.871    3.117
 apply_single                       119 13.6    0.000    0.000    2.871    3.117
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.944    2.954
 cp_dbcsr_syevd                      50 12.0    0.002    0.003    2.902    2.902
 density_rs2pw                      119  9.7    0.004    0.004    2.613    2.732
 ot_diis_step                       108 11.5    0.006    0.006    2.664    2.665
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.624    2.625
 cp_fm_redistribute_end              50 14.0    2.381    2.594    2.387    2.595
 cp_fm_diag_elpa_base                50 14.0    0.207    2.493    0.208    2.503
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.432    2.496
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.388    2.391
 pw_transfer                       1439 11.6    0.053    0.058    2.190    2.272
 acc_transpose_blocks             54864 15.5    0.231    0.254    1.780    2.234
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    2.114    2.198
 potential_pw2rs                    119 12.3    0.004    0.004    2.168    2.192
 wfi_extrapolate                     11  7.9    0.001    0.001    2.153    2.153
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.111    2.151
 init_scf_loop                       11  6.9    0.000    0.000    2.150    2.150
 grid_integrate_task_list           119 12.3    2.020    2.132    2.020    2.132
 make_m2s                          4572 13.5    0.054    0.057    1.978    2.033
 mp_sum_d                          4125 12.0    1.354    1.990    1.354    1.990
 fft3d_ps                          1201 14.6    0.369    0.475    1.885    1.970
 make_images                       4572 14.5    0.132    0.138    1.895    1.949
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.750    1.778
 fft_wrap_pw1pw2_140                487 13.2    0.081    0.094    1.603    1.689
 mp_alltoall_d11v                  2130 13.8    1.419    1.595    1.419    1.595
 grid_collocate_task_list           119  9.7    1.291    1.344    1.291    1.344
 mp_waitany                       12084 13.8    1.206    1.338    1.206    1.338
 dbcsr_dot_sd                      1205 11.9    0.051    0.062    0.699    1.130
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=53.711000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=431.363636, yerr=0.881396
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             487.944192E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62664.
 MP_Allreduce        10226                    305.
 MP_Sync               104
 MP_Alltoall          2060                 100898.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.031   41.098   41.098
 qs_mol_dyn_low                       1  2.0    0.003    0.003   40.879   40.920
 qs_forces                           11  3.9    0.002    0.002   40.791   40.791
 qs_energies                         11  4.9    0.001    0.002   39.067   39.070
 scf_env_do_scf                      11  5.9    0.000    0.001   33.253   33.253
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   30.532   30.533
 dbcsr_multiply_generic            2286 12.5    0.100    0.103   22.413   22.791
 qs_scf_new_mos                     108  7.5    0.001    0.001   20.786   21.036
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   20.785   21.036
 ot_scf_mini                        108  9.5    0.002    0.003   19.873   20.047
 velocity_verlet                     10  3.0    0.001    0.002   19.214   19.215
 multiply_cannon                   2286 13.5    0.209    0.219   17.080   18.742
 multiply_cannon_loop              2286 14.5    0.900    0.972   15.880   17.447
 ot_mini                            108 10.5    0.001    0.001   12.230   12.467
 mp_waitall_1                    200699 16.5    5.974   11.031    5.974   11.031
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.726    9.905
 multiply_cannon_metrocomm3       27432 15.5    0.068    0.070    4.243    9.514
 multiply_cannon_multrec          27432 15.5    1.963    4.541    6.517    9.461
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.511    7.655
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    7.511    7.655
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.651    6.780
 dbcsr_mm_accdrv_process          47894 16.0    3.365    5.629    4.484    6.321
 qs_ot_get_p                        119 10.4    0.001    0.001    4.688    4.912
 init_scf_run                        11  5.9    0.000    0.001    4.586    4.587
 scf_env_initial_rho_setup           11  6.9    0.002    0.002    4.586    4.586
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.710    4.550
 sum_up_and_integrate               119 10.3    0.024    0.026    4.473    4.479
 integrate_v_rspace                 119 11.3    0.002    0.002    4.449    4.455
 mp_sum_l                          7207 12.9    2.194    4.174    2.194    4.174
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.121    4.164
 calculate_rho_elec                 119  8.7    0.021    0.024    4.121    4.164
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.066    4.140
 apply_single                       119 13.6    0.000    0.000    3.065    4.140
 rs_pw_transfer                     974 11.9    0.010    0.011    3.005    3.409
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    3.124    3.146
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.956    2.958
 make_m2s                          4572 13.5    0.052    0.054    2.692    2.945
 density_rs2pw                      119  9.7    0.004    0.004    2.450    2.881
 make_images                       4572 14.5    0.198    0.235    2.604    2.853
 multiply_cannon_sync_h2d         27432 15.5    2.147    2.819    2.147    2.819
 calculate_dm_sparse                119  9.5    0.000    0.000    2.735    2.811
 jit_kernel_multiply                 11 16.1    1.067    2.800    1.067    2.800
 init_scf_loop                       11  6.9    0.000    0.000    2.692    2.692
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.663    2.663
 ot_diis_step                       108 11.5    0.011    0.011    2.441    2.442
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.300    2.395
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.256    2.257
 cp_fm_redistribute_end              50 14.0    1.860    2.220    1.864    2.222
 potential_pw2rs                    119 12.3    0.006    0.006    2.199    2.207
 pw_transfer                       1439 11.6    0.065    0.073    2.127    2.158
 cp_fm_diag_elpa_base                50 14.0    0.342    2.099    0.356    2.150
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.035    2.069
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.939    1.947
 grid_integrate_task_list           119 12.3    1.838    1.933    1.838    1.933
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.895    1.897
 prepare_preconditioner              11  7.9    0.000    0.000    1.748    1.775
 make_preconditioner                 11  8.9    0.000    0.000    1.748    1.775
 fft3d_ps                          1201 14.6    0.512    0.564    1.734    1.763
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.643    1.701
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.646    1.686
 make_images_data                  4572 15.5    0.045    0.051    1.179    1.634
 wfi_extrapolate                     11  7.9    0.001    0.001    1.570    1.570
 hybrid_alltoall_any               4725 16.4    0.051    0.111    1.023    1.532
 fft_wrap_pw1pw2_140                487 13.2    0.079    0.086    1.468    1.503
 acc_transpose_blocks             27432 15.5    0.110    0.115    1.189    1.470
 mp_allgather_i34                  2286 14.5    0.643    1.417    0.643    1.417
 mp_alltoall_d11v                  2130 13.8    1.264    1.409    1.264    1.409
 grid_collocate_task_list           119  9.7    1.231    1.367    1.231    1.367
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.223    1.264
 mp_sum_d                          4125 12.0    0.620    1.064    0.620    1.064
 make_images_sizes                 4572 15.5    0.005    0.005    0.705    0.957
 mp_alltoall_i44                   4572 16.5    0.701    0.952    0.701    0.952
 rs_pw_transfer_RS2PW_140           130 11.5    0.140    0.147    0.527    0.941
 mp_waitany                        5720 13.7    0.512    0.938    0.512    0.938
 qs_energies_init_hamiltonians       11  5.9    0.000    0.002    0.937    0.937
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.922    0.937
 mp_alltoall_z22v                  1201 16.6    0.761    0.870    0.761    0.870
 acc_transpose_blocks_kernels     27432 16.5    0.183    0.270    0.649    0.855
 rs_pw_transfer_PW2RS_50            119 14.3    0.588    0.606    0.825    0.844
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=41.098000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=465.272727, yerr=1.542778
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             521.838592E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62660.
 MP_Allreduce        10225                    303.
 MP_Sync               104
 MP_Alltoall          1821                1993547.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_comm_split          50
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.053    0.106   35.223   35.225
 qs_mol_dyn_low                       1  2.0    0.003    0.004   34.813   34.820
 qs_forces                           11  3.9    0.005    0.015   34.699   34.701
 qs_energies                         11  4.9    0.003    0.016   33.039   33.044
 scf_env_do_scf                      11  5.9    0.004    0.011   27.260   27.261
 scf_env_do_scf_inner_loop          108  6.5    0.005    0.018   24.443   24.444
 dbcsr_multiply_generic            2286 12.5    0.108    0.122   17.745   17.859
 velocity_verlet                     10  3.0    0.005    0.013   16.245   16.248
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.739   15.758
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.738   15.757
 ot_scf_mini                        108  9.5    0.003    0.003   14.980   15.000
 multiply_cannon                   2286 13.5    0.197    0.201   14.095   14.908
 multiply_cannon_loop              2286 14.5    0.638    0.671   13.218   14.174
 ot_mini                            108 10.5    0.001    0.001    9.223    9.252
 multiply_cannon_multrec          18288 15.5    1.946    2.904    7.619    7.880
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.680    7.700
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.697    6.719
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.015    6.697    6.718
 dbcsr_mm_accdrv_process          38222 16.0    4.460    6.185    5.590    6.438
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.919    5.939
 init_scf_run                        11  5.9    0.000    0.001    4.438    4.438
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    4.437    4.438
 mp_waitall_1                    158411 16.6    2.953    4.323    2.953    4.323
 sum_up_and_integrate               119 10.3    0.029    0.030    4.243    4.248
 integrate_v_rspace                 119 11.3    0.002    0.003    4.214    4.222
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.733    3.742
 calculate_rho_elec                 119  8.7    0.030    0.032    3.732    3.742
 qs_ot_get_p                        119 10.4    0.001    0.001    3.527    3.554
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.915    3.538
 calculate_first_density_matrix       1  7.0    0.001    0.003    3.102    3.103
 rs_pw_transfer                     974 11.9    0.009    0.010    2.676    2.956
 multiply_cannon_metrocomm3       18288 15.5    0.045    0.046    1.548    2.808
 init_scf_loop                       11  6.9    0.001    0.005    2.785    2.786
 jit_kernel_multiply                 10 15.8    1.080    2.673    1.080    2.673
 calculate_dm_sparse                119  9.5    0.000    0.000    2.554    2.565
 density_rs2pw                      119  9.7    0.004    0.004    2.224    2.486
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.070    2.396
 apply_single                       119 13.6    0.000    0.000    2.070    2.396
 qs_ot_p2m_diag                      50 11.0    0.012    0.013    2.352    2.358
 make_m2s                          4572 13.5    0.045    0.046    1.980    2.149
 pw_transfer                       1439 11.6    0.066    0.071    2.077    2.091
 make_images                       4572 14.5    0.190    0.202    1.895    2.064
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.061    2.062
 potential_pw2rs                    119 12.3    0.007    0.008    2.012    2.024
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.983    1.999
 prepare_preconditioner              11  7.9    0.000    0.001    1.951    1.953
 make_preconditioner                 11  8.9    0.001    0.002    1.951    1.953
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.896    1.901
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.799    1.887
 grid_integrate_task_list           119 12.3    1.798    1.879    1.798    1.879
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.875    1.878
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.788    1.790
 cp_fm_redistribute_end              50 14.0    1.329    1.758    1.330    1.759
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.711    1.722
 cp_fm_diag_elpa_base                50 14.0    0.411    1.670    0.426    1.714
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.691    1.699
 fft3d_ps                          1201 14.6    0.523    0.539    1.663    1.679
 mp_sum_l                          7207 12.9    1.279    1.666    1.279    1.666
 ot_diis_step                       108 11.5    0.011    0.011    1.513    1.513
 multiply_cannon_sync_h2d         18288 15.5    1.344    1.512    1.344    1.512
 fft_wrap_pw1pw2_140                487 13.2    0.089    0.093    1.484    1.499
 grid_collocate_task_list           119  9.7    1.210    1.367    1.210    1.367
 acc_transpose_blocks             18288 15.5    0.077    0.079    1.299    1.337
 wfi_extrapolate                     11  7.9    0.001    0.001    1.277    1.278
 make_images_data                  4572 15.5    0.045    0.049    0.856    1.050
 multiply_cannon_metrocomm1       18288 15.5    0.029    0.030    0.369    0.989
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    0.985    0.988
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.934    0.957
 hybrid_alltoall_any               4725 16.4    0.055    0.114    0.746    0.944
 acc_transpose_blocks_kernels     18288 16.5    0.211    0.220    0.851    0.893
 mp_waitany                        9880 13.7    0.569    0.865    0.569    0.865
 mp_alltoall_d11v                  2130 13.8    0.774    0.860    0.774    0.860
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.853    0.856
 cp_fm_cholesky_invert               11 10.9    0.847    0.852    0.847    0.852
 rs_pw_transfer_RS2PW_140           130 11.5    0.120    0.124    0.541    0.821
 mp_alltoall_z22v                  1201 16.6    0.715    0.812    0.715    0.812
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.662    0.756
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=35.225000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=496.818182, yerr=1.402477
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             550.621184E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62659.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_comm_split          50
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.034    0.114   37.022   37.023
 qs_mol_dyn_low                       1  2.0    0.003    0.004   36.639   36.648
 qs_forces                           11  3.9    0.003    0.013   36.302   36.302
 qs_energies                         11  4.9    0.003    0.006   34.562   34.568
 scf_env_do_scf                      11  5.9    0.002    0.005   29.022   29.023
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.008   25.208   25.208
 velocity_verlet                     10  3.0    0.002    0.002   18.886   18.900
 dbcsr_multiply_generic            2286 12.5    0.100    0.103   18.536   18.642
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.462   16.517
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.461   16.516
 ot_scf_mini                        108  9.5    0.003    0.004   15.513   15.562
 multiply_cannon                   2286 13.5    0.227    0.273   14.733   15.212
 multiply_cannon_loop              2286 14.5    0.938    0.970   13.720   14.160
 ot_mini                            108 10.5    0.001    0.001    9.463    9.526
 multiply_cannon_multrec          27432 15.5    2.366    3.039    8.883    9.239
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.643    7.693
 dbcsr_mm_accdrv_process          47916 15.9    5.482    7.200    6.424    7.688
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.755    6.807
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.755    6.806
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.015    6.059
 init_scf_run                        11  5.9    0.000    0.001    4.076    4.077
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    4.076    4.077
 sum_up_and_integrate               119 10.3    0.035    0.038    3.972    3.981
 integrate_v_rspace                 119 11.3    0.002    0.002    3.937    3.946
 init_scf_loop                       11  6.9    0.001    0.003    3.789    3.790
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.736    3.772
 calculate_rho_elec                 119  8.7    0.040    0.046    3.736    3.772
 qs_ot_get_p                        119 10.4    0.001    0.001    3.412    3.485
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.825    3.309
 prepare_preconditioner              11  7.9    0.000    0.001    2.882    2.890
 make_preconditioner                 11  8.9    0.003    0.010    2.881    2.890
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.483    2.817
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.618    2.619
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.134    2.597
 apply_single                       119 13.6    0.000    0.000    2.133    2.596
 mp_waitall_1                    137007 16.6    1.846    2.496    1.846    2.496
 rs_pw_transfer                     974 11.9    0.009    0.009    2.275    2.493
 make_m2s                          4572 13.5    0.054    0.056    2.286    2.439
 density_rs2pw                      119  9.7    0.004    0.004    2.141    2.332
 make_images                       4572 14.5    0.269    0.332    2.178    2.329
 calculate_dm_sparse                119  9.5    0.000    0.000    2.201    2.260
 pw_transfer                       1439 11.6    0.066    0.071    2.126    2.165
 jit_kernel_multiply                 10 15.8    0.882    2.101    0.882    2.101
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.089    2.098
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.033    2.077
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.950    1.974
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.946    1.947
 grid_integrate_task_list           119 12.3    1.827    1.914    1.827    1.914
 ot_diis_step                       108 11.5    0.012    0.012    1.779    1.779
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.765    1.766
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.734    1.747
 fft3d_ps                          1201 14.6    0.559    0.612    1.707    1.747
 potential_pw2rs                    119 12.3    0.008    0.009    1.733    1.739
 fft_wrap_pw1pw2_140                487 13.2    0.088    0.095    1.629    1.676
 mp_sum_l                          7207 12.9    1.116    1.623    1.116    1.623
 acc_transpose_blocks             27432 15.5    0.114    0.116    1.526    1.550
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.514    1.515
 cp_fm_redistribute_end              50 14.0    1.003    1.490    1.004    1.491
 cp_fm_diag_elpa_base                50 14.0    0.465    1.421    0.484    1.463
 wfi_extrapolate                     11  7.9    0.001    0.001    1.398    1.398
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.374    1.385
 grid_collocate_task_list           119  9.7    1.221    1.325    1.221    1.325
 multiply_cannon_metrocomm3       27432 15.5    0.038    0.039    0.720    1.239
 qs_energies_init_hamiltonians       11  5.9    0.008    0.029    1.160    1.180
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.127    1.146
 dbcsr_complete_redistribute        329 12.2    0.139    0.164    0.841    1.121
 cp_fm_upper_to_full                 72 13.5    0.804    1.121    0.804    1.121
 multiply_cannon_sync_h2d         27432 15.5    0.983    1.055    0.983    1.055
 make_images_data                  4572 15.5    0.045    0.050    0.883    1.040
 hybrid_alltoall_any               4725 16.4    0.062    0.151    0.757    0.966
 cp_fm_cholesky_invert               11 10.9    0.945    0.949    0.945    0.949
 acc_transpose_blocks_kernels     27432 16.5    0.268    0.278    0.893    0.914
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.609    0.880
 mp_alltoall_d11v                  2130 13.8    0.770    0.878    0.770    0.878
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.791    0.868
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.832    0.837
 mp_alltoall_z22v                  1201 16.6    0.731    0.773    0.731    0.773
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=37.023000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=522.545455, yerr=2.965365
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             598.622208E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_comm_split          50
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.034    0.077   29.956   29.957
 qs_mol_dyn_low                       1  2.0    0.003    0.003   29.567   29.576
 qs_forces                           11  3.9    0.002    0.002   29.508   29.509
 qs_energies                         11  4.9    0.001    0.002   27.761   27.764
 scf_env_do_scf                      11  5.9    0.000    0.001   22.641   22.641
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   19.929   19.930
 velocity_verlet                     10  3.0    0.002    0.002   14.994   14.997
 dbcsr_multiply_generic            2286 12.5    0.092    0.095   12.842   12.981
 qs_scf_new_mos                     108  7.5    0.001    0.001   11.688   11.714
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   11.687   11.713
 ot_scf_mini                        108  9.5    0.002    0.002   10.992   11.018
 multiply_cannon                   2286 13.5    0.230    0.237   10.149   10.528
 multiply_cannon_loop              2286 14.5    0.331    0.342    9.178    9.432
 multiply_cannon_multrec           9144 15.5    1.681    1.923    6.017    6.213
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.143    6.167
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.143    6.166
 ot_mini                            108 10.5    0.001    0.001    6.105    6.139
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.474    5.496
 qs_ot_get_derivative               108 11.5    0.001    0.001    4.789    4.815
 dbcsr_mm_accdrv_process          12550 15.8    3.228    4.088    4.235    4.320
 sum_up_and_integrate               119 10.3    0.038    0.040    3.790    3.793
 integrate_v_rspace                 119 11.3    0.002    0.003    3.752    3.756
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.678    3.707
 calculate_rho_elec                 119  8.7    0.060    0.061    3.677    3.707
 init_scf_run                        11  5.9    0.000    0.001    3.672    3.672
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.672    3.672
 qs_ot_get_p                        119 10.4    0.001    0.001    3.081    3.119
 init_scf_loop                       11  6.9    0.000    0.000    2.684    2.686
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.441    2.446
 mp_waitall_1                    115863 16.7    1.711    2.204    1.711    2.204
 pw_transfer                       1439 11.6    0.066    0.070    2.171    2.181
 density_rs2pw                      119  9.7    0.004    0.004    1.994    2.131
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.077    2.089
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    2.062    2.066
 make_m2s                          4572 13.5    0.034    0.036    1.851    2.046
 rs_pw_transfer                     974 11.9    0.008    0.008    1.847    1.982
 jit_kernel_multiply                 10 15.9    0.969    1.961    0.969    1.961
 make_images                       4572 14.5    0.266    0.299    1.761    1.954
 grid_integrate_task_list           119 12.3    1.858    1.947    1.858    1.947
 prepare_preconditioner              11  7.9    0.000    0.000    1.922    1.927
 make_preconditioner                 11  8.9    0.000    0.000    1.922    1.927
 calculate_dm_sparse                119  9.5    0.000    0.000    1.882    1.904
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.854    1.854
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.810    1.837
 fft3d_ps                          1201 14.6    0.563    0.572    1.736    1.747
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.741    1.742
 fft_wrap_pw1pw2_140                487 13.2    0.087    0.090    1.677    1.691
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.619    1.631
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.549    1.562
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.545    1.546
 potential_pw2rs                    119 12.3    0.010    0.010    1.519    1.525
 cp_fm_redistribute_end              50 14.0    0.769    1.516    0.770    1.517
 cp_fm_diag_elpa_base                50 14.0    0.699    1.439    0.744    1.496
 grid_collocate_task_list           119  9.7    1.280    1.406    1.280    1.406
 ot_diis_step                       108 11.5    0.013    0.013    1.300    1.300
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.288    1.296
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.214    1.228
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.222    1.222
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.169    1.192
 apply_single                       119 13.6    0.000    0.000    1.169    1.192
 wfi_extrapolate                     11  7.9    0.001    0.001    1.171    1.171
 hybrid_alltoall_any               4725 16.4    0.063    0.176    0.825    1.107
 make_images_data                  4572 15.5    0.039    0.042    0.859    1.098
 acc_transpose_blocks              9144 15.5    0.039    0.040    1.061    1.084
 cp_fm_cholesky_invert               11 10.9    0.987    0.990    0.987    0.990
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.867    0.919
 mp_alltoall_d11v                  2130 13.8    0.808    0.895    0.808    0.895
 acc_transpose_blocks_kernels      9144 16.5    0.117    0.121    0.818    0.836
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.792    0.795
 multiply_cannon_sync_h2d          9144 15.5    0.709    0.795    0.709    0.795
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.748    0.756
 mp_allgather_i34                  2286 14.5    0.306    0.740    0.306    0.740
 multiply_cannon_metrocomm3        9144 15.5    0.019    0.019    0.408    0.736
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    0.677    0.730
 jit_kernel_transpose                 5 15.6    0.701    0.718    0.701    0.718
 mp_alltoall_z22v                  1201 16.6    0.622    0.673    0.622    0.673
 yz_to_x                            606 15.1    0.264    0.274    0.590    0.606
 dbcsr_complete_redistribute        329 12.2    0.162    0.168    0.566    0.602
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=29.957000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=566.454545, yerr=4.335134
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             776.146944E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63729.
 MP_Allreduce        10074                    433.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.033   43.493   43.494
 qs_mol_dyn_low                       1  2.0    0.003    0.003   43.292   43.298
 qs_forces                           11  3.9    0.001    0.002   43.233   43.234
 qs_energies                         11  4.9    0.001    0.002   41.217   41.220
 scf_env_do_scf                      11  5.9    0.001    0.001   35.139   35.139
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   26.885   26.887
 velocity_verlet                     10  3.0    0.002    0.002   24.422   24.427
 dbcsr_multiply_generic            2286 12.5    0.100    0.100   18.039   18.243
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.749   16.842
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.748   16.842
 ot_scf_mini                        108  9.5    0.002    0.002   15.637   15.734
 multiply_cannon                   2286 13.5    0.302    0.313   13.812   14.787
 multiply_cannon_loop              2286 14.5    0.343    0.347   12.498   13.512
 ot_mini                            108 10.5    0.001    0.001    9.123    9.245
 multiply_cannon_multrec           9144 15.5    3.400    4.834    8.510    8.599
 init_scf_loop                       11  6.9    0.000    0.000    8.221    8.227
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.523    7.668
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    7.522    7.668
 prepare_preconditioner              11  7.9    0.000    0.000    7.223    7.238
 make_preconditioner                 11  8.9    0.000    0.000    7.223    7.238
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.096    7.194
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.809    7.111
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.808    6.940
 dbcsr_mm_accdrv_process          12550 15.8    4.070    5.610    4.986    6.332
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.495    4.555
 calculate_rho_elec                 119  8.7    0.118    0.121    4.494    4.555
 cp_fm_upper_to_full                 72 14.2    3.169    4.519    3.169    4.519
 sum_up_and_integrate               119 10.3    0.065    0.066    4.192    4.201
 integrate_v_rspace                 119 11.3    0.003    0.003    4.126    4.135
 init_scf_run                        11  5.9    0.000    0.001    4.021    4.021
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    4.021    4.021
 qs_ot_get_p                        119 10.4    0.001    0.001    3.720    3.860
 mp_waitall_1                     94719 16.7    2.556    3.666    2.556    3.666
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.584    3.021
 pw_transfer                       1439 11.6    0.068    0.069    2.922    2.926
 dbcsr_complete_redistribute        329 12.2    0.284    0.291    2.046    2.829
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.823    2.828
 make_m2s                          4572 13.5    0.038    0.038    2.498    2.695
 make_images                       4572 14.5    0.351    0.386    2.377    2.574
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.676    2.461
 fft3d_ps                          1201 14.6    0.595    0.601    2.447    2.452
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.390    2.452
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.178    2.426
 apply_single                       119 13.6    0.000    0.000    2.178    2.425
 density_rs2pw                      119  9.7    0.004    0.004    2.347    2.371
 fft_wrap_pw1pw2_140                487 13.2    0.095    0.096    2.334    2.341
 qs_ot_p2m_diag                      50 11.0    0.042    0.043    2.316    2.317
 calculate_dm_sparse                119  9.5    0.000    0.000    2.242    2.299
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    1.307    2.286
 mp_alltoall_i22                    627 13.8    1.397    2.203    1.397    2.203
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.409    2.190
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.094    2.095
 grid_integrate_task_list           119 12.3    2.071    2.084    2.071    2.084
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.026    2.027
 ot_diis_step                       108 11.5    0.014    0.014    1.994    1.995
 mp_sum_l                          7207 12.9    1.191    1.950    1.191    1.950
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.805    1.847
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.795    1.845
 cp_fm_cholesky_invert               11 10.9    1.775    1.779    1.775    1.779
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.770    1.771
 jit_kernel_multiply                 10 15.5    0.889    1.726    0.889    1.726
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.679    1.679
 cp_fm_diag_elpa_base                50 14.0    1.532    1.585    1.676    1.676
 rs_pw_transfer                     974 11.9    0.009    0.009    1.615    1.641
 potential_pw2rs                    119 12.3    0.014    0.014    1.572    1.576
 grid_collocate_task_list           119  9.7    1.500    1.516    1.500    1.516
 wfi_extrapolate                     11  7.9    0.001    0.001    1.498    1.499
 hybrid_alltoall_any               4725 16.4    0.087    0.148    1.217    1.488
 make_images_data                  4572 15.5    0.043    0.046    1.195    1.442
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.286    1.342
 mp_alltoall_d11v                  2130 13.8    1.207    1.277    1.207    1.277
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.190    1.218
 acc_transpose_blocks              9144 15.5    0.038    0.038    1.138    1.167
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.106    1.128
 multiply_cannon_sync_h2d          9144 15.5    1.042    1.044    1.042    1.044
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.982    1.032
 yz_to_x                            606 15.1    0.461    0.471    0.996    1.008
 qs_create_task_list                 11  7.9    0.000    0.000    0.945    0.957
 generate_qs_task_list               11  8.9    0.371    0.390    0.945    0.957
 mp_alltoall_z22v                  1201 16.6    0.898    0.945    0.898    0.945
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.927    0.940
 acc_transpose_blocks_kernels      9144 16.5    0.118    0.119    0.887    0.916
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=43.494000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=727.818182, yerr=17.335293
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             502.157312E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65372.
 MP_Allreduce         9840                    486.
 MP_Sync               100
 MP_Alltoall          1938                1379060.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_comm_split          48
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.033   85.255   85.256
 qs_mol_dyn_low                       1  2.0    0.003    0.004   84.908   84.918
 qs_forces                           11  3.9    0.002    0.003   84.833   84.834
 qs_energies                         11  4.9    0.002    0.004   81.941   81.957
 scf_env_do_scf                      11  5.9    0.001    0.001   72.686   72.689
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   66.778   66.779
 dbcsr_multiply_generic            2055 12.4    0.106    0.111   52.714   53.057
 qs_scf_new_mos                      99  7.5    0.000    0.001   48.965   49.093
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   48.965   49.092
 ot_scf_mini                         99  9.5    0.002    0.003   46.509   46.620
 multiply_cannon                   2055 13.4    0.180    0.188   42.966   44.001
 velocity_verlet                     10  3.0    0.003    0.005   43.149   43.150
 multiply_cannon_loop              2055 14.4    1.546    1.590   41.965   42.966
 ot_mini                             99 10.5    0.001    0.001   28.162   28.294
 qs_ot_get_derivative                99 11.5    0.001    0.001   21.289   21.394
 multiply_cannon_multrec          49320 15.4   12.130   12.952   17.457   18.074
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.799   14.901
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.011   14.798   14.900
 qs_ks_update_qs_env                110  7.6    0.001    0.001   13.000   13.089
 mp_waitall_1                    220248 16.4   11.622   12.727   11.622   12.727
 multiply_cannon_sync_h2d         49320 15.4    9.945   10.722    9.945   10.722
 qs_ot_get_p                        110 10.4    0.001    0.001    9.848    9.944
 multiply_cannon_metrocomm3       49320 15.4    0.079    0.084    6.971    8.278
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.281    7.861
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.205    7.705
 apply_single                       110 13.6    0.000    0.001    7.205    7.705
 sum_up_and_integrate               110 10.3    0.036    0.044    7.290    7.303
 integrate_v_rspace                 110 11.3    0.003    0.003    7.254    7.274
 init_scf_run                        11  5.9    0.000    0.001    7.180    7.181
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    7.180    7.180
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.883    7.022
 calculate_rho_elec                 110  8.6    0.021    0.026    6.883    7.021
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    6.869    6.927
 ot_diis_step                        99 11.5    0.005    0.006    6.588    6.589
 qs_ot_p2m_diag                      48 11.0    0.012    0.018    6.431    6.447
 mp_sum_l                          6514 12.8    5.262    6.231    5.262    6.231
 init_scf_loop                       11  6.9    0.000    0.003    5.877    5.877
 dbcsr_mm_accdrv_process          87628 16.1    2.073    2.203    5.205    5.626
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    5.451    5.451
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.912    4.914
 cp_fm_redistribute_end              48 14.0    4.284    4.881    4.287    4.883
 cp_fm_diag_elpa_base                48 14.0    0.589    4.727    0.592    4.756
 rs_pw_transfer                     902 11.9    0.011    0.013    3.942    4.574
 density_rs2pw                      110  9.6    0.004    0.005    3.739    4.356
 wfi_extrapolate                     11  7.9    0.001    0.001    4.183    4.183
 make_m2s                          4110 13.4    0.062    0.065    4.041    4.137
 make_images                       4110 14.4    0.177    0.192    3.944    4.042
 calculate_dm_sparse                110  9.5    0.000    0.001    3.916    4.026
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.748    3.752
 multiply_cannon_metrocomm1       49320 15.4    0.061    0.065    2.703    3.706
 prepare_preconditioner              11  7.9    0.000    0.001    3.652    3.677
 make_preconditioner                 11  8.9    0.000    0.001    3.652    3.677
 pw_transfer                       1331 11.6    0.055    0.066    3.534    3.620
 fft_wrap_pw1pw2                   1111 12.6    0.007    0.008    3.446    3.535
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.429    3.491
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.399    3.440
 grid_integrate_task_list           110 12.3    3.246    3.406    3.246    3.406
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    3.248    3.296
 fft3d_ps                          1111 14.6    0.778    0.883    2.976    3.049
 fft_wrap_pw1pw2_140                451 13.1    0.169    0.189    2.868    2.958
 jit_kernel_multiply                 13 15.9    2.849    2.943    2.849    2.943
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.905    2.908
 potential_pw2rs                    110 12.3    0.006    0.007    2.867    2.897
 mp_waitany                       14300 13.8    1.766    2.400    1.766    2.400
 acc_transpose_blocks             49320 15.4    0.225    0.239    2.270    2.373
 mp_alltoall_d11v                  2046 13.8    1.985    2.357    1.985    2.357
 grid_collocate_task_list           110  9.6    2.084    2.261    2.084    2.261
 mp_sum_d                          3879 11.9    1.565    2.116    1.565    2.116
 cp_fm_cholesky_invert               11 10.9    1.962    1.966    1.962    1.966
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.941    1.960
 make_images_data                  4110 15.4    0.043    0.045    1.752    1.865
 mp_alltoall_z22v                  1111 16.6    1.513    1.795    1.513    1.795
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.735    1.759
 hybrid_alltoall_any               4261 16.3    0.081    0.477    1.519    1.735
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=85.256000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=476.909091, yerr=1.975051
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             587.243520E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65587.
 MP_Allreduce         9839                    562.
 MP_Sync               100
 MP_Alltoall          1717                3517269.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_comm_split          48
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.037   71.855   71.857
 qs_mol_dyn_low                       1  2.0    0.003    0.003   71.500   71.510
 qs_forces                           11  3.9    0.003    0.005   71.173   71.174
 qs_energies                         11  4.9    0.044    0.128   67.821   67.827
 scf_env_do_scf                      11  5.9    0.000    0.001   58.232   58.235
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   50.181   50.182
 dbcsr_multiply_generic            2055 12.4    0.114    0.119   39.189   39.400
 velocity_verlet                     10  3.0    0.001    0.002   37.275   37.282
 qs_scf_new_mos                      99  7.5    0.001    0.001   33.593   33.761
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   33.593   33.761
 multiply_cannon                   2055 13.4    0.224    0.247   32.179   33.489
 ot_scf_mini                         99  9.5    0.003    0.003   31.923   32.084
 multiply_cannon_loop              2055 14.4    0.929    0.951   30.760   31.811
 ot_mini                             99 10.5    0.001    0.001   18.894   19.057
 multiply_cannon_multrec          24660 15.4    7.636    9.448   14.646   16.352
 rebuild_ks_matrix                  110  8.3    0.000    0.001   13.927   14.041
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   13.927   14.041
 qs_ot_get_derivative                99 11.5    0.001    0.001   13.043   13.201
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.276   12.372
 mp_waitall_1                    176588 16.5    7.841   10.598    7.841   10.598
 multiply_cannon_sync_h2d         24660 15.4    7.028    8.158    7.028    8.158
 multiply_cannon_metrocomm3       24660 15.4    0.070    0.072    5.325    8.089
 init_scf_loop                       11  6.9    0.000    0.000    8.011    8.012
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.713    7.626
 apply_single                       110 13.6    0.000    0.001    6.713    7.626
 dbcsr_mm_accdrv_process          52282 16.1    5.242    6.317    6.851    7.290
 init_scf_run                        11  5.9    0.000    0.001    7.067    7.068
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    7.067    7.068
 sum_up_and_integrate               110 10.3    0.051    0.057    6.743    6.754
 integrate_v_rspace                 110 11.3    0.002    0.003    6.692    6.706
 qs_ot_get_p                        110 10.4    0.001    0.001    6.344    6.521
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.319    6.331
 calculate_rho_elec                 110  8.6    0.039    0.047    6.319    6.330
 prepare_preconditioner              11  7.9    0.000    0.000    5.967    5.993
 make_preconditioner                 11  8.9    0.000    0.000    5.967    5.993
 ot_diis_step                        99 11.5    0.010    0.010    5.800    5.801
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.872    5.739
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.535    5.700
 make_m2s                          4110 13.4    0.057    0.060    4.280    4.808
 make_images                       4110 14.4    0.395    0.441    4.171    4.695
 qs_ot_p2m_diag                      48 11.0    0.028    0.044    4.338    4.359
 density_rs2pw                      110  9.6    0.004    0.004    3.483    3.968
 pw_transfer                       1331 11.6    0.066    0.074    3.799    3.944
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.881    3.881
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.692    3.840
 rs_pw_transfer                     902 11.9    0.012    0.013    3.177    3.702
 calculate_dm_sparse                110  9.5    0.001    0.001    3.583    3.617
 wfi_extrapolate                     11  7.9    0.001    0.001    3.604    3.604
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.532    3.534
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.354    3.444
 calculate_first_density_matrix       1  7.0    0.000    0.001    3.373    3.374
 grid_integrate_task_list           110 12.3    3.142    3.323    3.142    3.323
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.271    3.273
 cp_fm_redistribute_end              48 14.0    2.442    3.237    2.445    3.239
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.149    3.212
 cp_fm_diag_elpa_base                48 14.0    0.759    3.093    0.790    3.174
 fft3d_ps                          1111 14.6    1.101    1.318    3.042    3.173
 fft_wrap_pw1pw2_140                451 13.1    0.202    0.221    3.020    3.173
 make_images_data                  4110 15.4    0.047    0.051    2.325    2.861
 cp_fm_cholesky_invert               11 10.9    2.848    2.856    2.848    2.856
 mp_sum_l                          6514 12.8    2.111    2.848    2.111    2.848
 hybrid_alltoall_any               4261 16.3    0.102    0.442    2.027    2.785
 jit_kernel_multiply                 10 16.4    1.261    2.695    1.261    2.695
 potential_pw2rs                    110 12.3    0.008    0.008    2.585    2.598
 grid_collocate_task_list           110  9.6    2.054    2.483    2.054    2.483
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    2.147    2.157
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    2.002    2.025
 mp_alltoall_d11v                  2046 13.8    1.715    1.961    1.715    1.961
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    1.877    1.878
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.818    1.832
 mp_allgather_i34                  2055 14.4    0.751    1.749    0.751    1.749
 mp_waitany                       10164 13.8    1.208    1.706    1.208    1.706
 acc_transpose_blocks             24660 15.4    0.111    0.114    1.620    1.637
 multiply_cannon_metrocomm4       22605 15.4    0.072    0.076    0.779    1.614
 mp_irecv_dv                      57340 16.2    0.655    1.501    0.655    1.501
 dbcsr_complete_redistribute        325 12.2    0.244    0.309    1.214    1.487
 cp_fm_cholesky_decompose            22 10.9    1.478    1.484    1.478    1.484
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.367    1.474
 rs_pw_transfer_RS2PW_140           121 11.5    0.208    0.220    0.950    1.461
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=71.857000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=555.181818, yerr=7.183544
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             661.110784E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65578.
 MP_Allreduce         9838                    559.
 MP_Sync               100
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_comm_split          48
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.023    0.039   63.377   63.379
 qs_mol_dyn_low                       1  2.0    0.003    0.006   62.972   62.987
 qs_forces                           11  3.9    0.007    0.043   62.898   62.903
 qs_energies                         11  4.9    0.012    0.027   59.641   59.651
 scf_env_do_scf                      11  5.9    0.001    0.001   50.444   50.445
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   41.288   41.289
 velocity_verlet                     10  3.0    0.006    0.008   33.678   33.680
 dbcsr_multiply_generic            2055 12.4    0.107    0.110   29.719   29.962
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.007   26.110
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.006   26.110
 ot_scf_mini                         99  9.5    0.002    0.003   24.775   24.884
 multiply_cannon                   2055 13.4    0.212    0.225   23.199   24.496
 multiply_cannon_loop              2055 14.4    0.616    0.638   21.929   22.917
 ot_mini                             99 10.5    0.001    0.001   14.072   14.190
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.584   12.693
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.016   12.584   12.692
 multiply_cannon_multrec          16440 15.4    4.005    5.306   10.712   11.951
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.086   11.179
 mp_waitall_1                    139946 16.5    7.100   10.480    7.100   10.480
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.592    9.704
 init_scf_loop                       11  6.9    0.001    0.010    9.110    9.111
 multiply_cannon_metrocomm3       16440 15.4    0.044    0.044    4.240    7.390
 prepare_preconditioner              11  7.9    0.000    0.001    7.283    7.298
 make_preconditioner                 11  8.9    0.000    0.002    7.283    7.298
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.630    6.979
 sum_up_and_integrate               110 10.3    0.059    0.060    6.695    6.711
 dbcsr_mm_accdrv_process          34862 16.1    5.427    6.181    6.561    6.707
 integrate_v_rspace                 110 11.3    0.002    0.003    6.635    6.651
 init_scf_run                        11  5.9    0.000    0.001    6.417    6.417
 scf_env_initial_rho_setup           11  6.9    0.001    0.003    6.416    6.417
 qs_rho_update_rho_low              110  7.6    0.001    0.002    6.077    6.085
 calculate_rho_elec                 110  8.6    0.058    0.059    6.076    6.085
 qs_ot_get_p                        110 10.4    0.001    0.001    5.833    5.956
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    5.012    5.440
 apply_single                       110 13.6    0.000    0.000    5.012    5.440
 make_m2s                          4110 13.4    0.050    0.051    4.320    4.742
 make_images                       4110 14.4    0.389    0.508    4.205    4.629
 ot_diis_step                        99 11.5    0.011    0.011    4.444    4.445
 density_rs2pw                      110  9.6    0.004    0.005    3.221    4.379
 multiply_cannon_sync_h2d         16440 15.4    3.720    4.378    3.720    4.378
 qs_ot_p2m_diag                      48 11.0    0.041    0.044    4.120    4.125
 rs_pw_transfer                     902 11.9    0.010    0.011    2.993    4.078
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.187    3.826
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.765    3.765
 pw_transfer                       1331 11.6    0.065    0.073    3.678    3.689
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.572    3.584
 grid_integrate_task_list           110 12.3    3.160    3.361    3.160    3.361
 calculate_first_density_matrix       1  7.0    0.000    0.004    3.264    3.265
 calculate_dm_sparse                110  9.5    0.001    0.001    3.165    3.191
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.176    3.178
 cp_fm_redistribute_end              48 14.0    1.978    3.140    1.980    3.140
 cp_fm_diag_elpa_base                48 14.0    1.095    2.971    1.154    3.090
 wfi_extrapolate                     11  7.9    0.001    0.001    3.058    3.058
 cp_fm_cholesky_invert               11 10.9    3.024    3.030    3.024    3.030
 fft_wrap_pw1pw2_140                451 13.1    0.212    0.215    3.015    3.029
 make_images_data                  4110 15.4    0.043    0.048    2.508    2.998
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.944    2.945
 fft3d_ps                          1111 14.6    1.086    1.097    2.872    2.882
 hybrid_alltoall_any               4261 16.3    0.105    0.372    2.201    2.877
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.599    2.635
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.420    2.481
 potential_pw2rs                    110 12.3    0.010    0.011    2.462    2.472
 grid_collocate_task_list           110  9.6    2.079    2.458    2.079    2.458
 mp_waitany                       17072 13.8    1.290    2.426    1.290    2.426
 multiply_cannon_metrocomm4       14385 15.4    0.044    0.047    0.852    2.281
 mp_sum_l                          6514 12.8    1.595    2.224    1.595    2.224
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    2.183    2.191
 mp_irecv_dv                      48980 15.7    0.783    2.158    0.783    2.158
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    2.118    2.131
 mp_alltoall_d11v                  2046 13.8    1.747    2.091    1.747    2.091
 rs_pw_transfer_RS2PW_140           121 11.5    0.175    0.179    0.957    2.030
 dbcsr_complete_redistribute        325 12.2    0.319    0.356    1.520    1.984
 jit_kernel_multiply                  8 16.4    0.748    1.933    0.748    1.933
 cp_fm_upper_to_full                 70 13.6    1.390    1.875    1.390    1.875
 cp_fm_cholesky_decompose            22 10.9    1.771    1.792    1.771    1.792
 mp_allgather_i34                  2055 14.4    0.598    1.760    0.598    1.760
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.703    1.721
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.363    1.471
 copy_fm_to_dbcsr                   174 11.2    0.001    0.002    0.988    1.442
 qs_env_update_s_mstruct             11  6.9    0.000    0.001    1.221    1.316
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.305    1.313
 acc_transpose_blocks             16440 15.4    0.071    0.074    1.270    1.288
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=63.379000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=625.090909, yerr=9.567507
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             729.980928E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65576.
 MP_Allreduce         9838                    600.
 MP_Sync               100
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_comm_split          48
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.031    0.087   66.510   66.517
 qs_mol_dyn_low                       1  2.0    0.003    0.003   66.138   66.168
 qs_forces                           11  3.9    0.002    0.002   66.066   66.068
 qs_energies                         11  4.9    0.003    0.012   62.640   62.644
 scf_env_do_scf                      11  5.9    0.001    0.001   53.882   53.885
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   41.869   41.869
 velocity_verlet                     10  3.0    0.002    0.006   37.390   37.392
 dbcsr_multiply_generic            2055 12.4    0.114    0.116   29.893   30.090
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.696   26.806
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.695   26.805
 ot_scf_mini                         99  9.5    0.003    0.004   25.068   25.169
 multiply_cannon                   2055 13.4    0.242    0.265   22.882   23.947
 multiply_cannon_loop              2055 14.4    0.888    0.903   21.454   22.032
 multiply_cannon_multrec          24660 15.4    4.233    6.974   13.093   14.388
 ot_mini                             99 10.5    0.001    0.001   14.247   14.362
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.269   12.385
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.019   12.269   12.385
 init_scf_loop                       11  6.9    0.002    0.011   11.970   11.971
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.851   10.948
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.105   10.210
 prepare_preconditioner              11  7.9    0.000    0.001   10.192   10.209
 make_preconditioner                 11  8.9    0.000    0.002   10.192   10.209
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.450    9.881
 dbcsr_mm_accdrv_process          52304 16.0    7.093    8.567    8.711    9.725
 sum_up_and_integrate               110 10.3    0.066    0.069    6.599    6.611
 integrate_v_rspace                 110 11.3    0.003    0.003    6.532    6.547
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.199    6.207
 calculate_rho_elec                 110  8.6    0.077    0.081    6.198    6.206
 mp_waitall_1                    121746 16.5    4.186    6.114    4.186    6.114
 qs_ot_get_p                        110 10.4    0.001    0.001    5.843    5.985
 init_scf_run                        11  5.9    0.000    0.001    5.930    5.931
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    5.930    5.931
 make_m2s                          4110 13.4    0.060    0.061    5.360    5.674
 make_images                       4110 14.4    0.573    0.696    5.219    5.529
 cp_fm_upper_to_full                 70 13.8    3.248    4.605    3.248    4.605
 ot_diis_step                        99 11.5    0.011    0.012    4.103    4.104
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.017    4.103
 apply_single                       110 13.6    0.000    0.000    4.017    4.102
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    4.015    4.030
 pw_transfer                       1331 11.6    0.065    0.074    3.840    3.868
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.734    3.767
 dbcsr_complete_redistribute        325 12.2    0.416    0.459    2.643    3.757
 density_rs2pw                      110  9.6    0.004    0.004    3.162    3.667
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.525    3.525
 grid_integrate_task_list           110 12.3    3.261    3.474    3.261    3.474
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.378    3.429
 multiply_cannon_sync_h2d         24660 15.4    3.183    3.395    3.183    3.395
 calculate_dm_sparse                110  9.5    0.001    0.001    3.196    3.227
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.102    3.212
 fft_wrap_pw1pw2_140                451 13.1    0.202    0.211    3.156    3.190
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.072    3.128
 multiply_cannon_metrocomm3       24660 15.4    0.036    0.037    1.361    3.092
 rs_pw_transfer                     902 11.9    0.010    0.011    2.551    3.055
 fft3d_ps                          1111 14.6    1.091    1.130    3.024    3.054
 wfi_extrapolate                     11  7.9    0.001    0.001    2.995    2.995
 make_images_data                  4110 15.4    0.047    0.050    2.651    2.983
 cp_fm_cholesky_invert               11 10.9    2.958    2.969    2.958    2.969
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.961    2.963
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    2.924    2.925
 hybrid_alltoall_any               4261 16.3    0.120    0.456    2.255    2.925
 cp_fm_redistribute_end              48 14.0    1.477    2.923    1.479    2.924
 cp_fm_diag_elpa_base                48 14.0    1.363    2.783    1.442    2.894
 calculate_first_density_matrix       1  7.0    0.000    0.003    2.834    2.837
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.732    2.819
 mp_alltoall_i22                    605 13.7    1.613    2.779    1.613    2.779
 jit_kernel_multiply                 12 15.7    1.291    2.600    1.291    2.600
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.562    2.591
 grid_collocate_task_list           110  9.6    2.181    2.444    2.181    2.444
 potential_pw2rs                    110 12.3    0.012    0.013    2.285    2.295
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    2.289    2.291
 mp_alltoall_d11v                  2046 13.8    1.702    2.242    1.702    2.242
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.807    1.826
 cp_fm_cholesky_decompose            22 10.9    1.778    1.812    1.778    1.812
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.717    1.751
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.596    1.699
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.670    1.682
 multiply_cannon_metrocomm4       20550 15.4    0.057    0.060    0.833    1.636
 acc_transpose_blocks             24660 15.4    0.107    0.111    1.604    1.634
 mp_sum_l                          6514 12.8    1.030    1.631    1.030    1.631
 mp_waitany                       13376 13.8    1.098    1.577    1.098    1.577
 mp_irecv_dv                      62702 16.1    0.733    1.557    0.733    1.557
 mp_allgather_i34                  2055 14.4    0.523    1.500    0.523    1.500
 rs_gather_matrices                 110 12.3    0.266    0.296    0.909    1.418
 rs_pw_transfer_RS2PW_140           121 11.5    0.170    0.179    0.807    1.331
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=66.517000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=693.454545, yerr=6.919848
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             827.195392E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65574.
 MP_Allreduce         9838                    640.
 MP_Sync               100
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_comm_split          48
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.033   57.033   57.035
 qs_mol_dyn_low                       1  2.0    0.003    0.003   56.745   56.762
 qs_forces                           11  3.9    0.002    0.002   56.679   56.680
 qs_energies                         11  4.9    0.001    0.001   52.955   52.960
 scf_env_do_scf                      11  5.9    0.000    0.001   44.673   44.673
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   36.654   36.655
 velocity_verlet                     10  3.0    0.005    0.006   31.843   31.846
 dbcsr_multiply_generic            2055 12.4    0.105    0.107   23.033   23.157
 qs_scf_new_mos                      99  7.5    0.001    0.001   21.200   21.259
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   21.199   21.259
 ot_scf_mini                         99  9.5    0.002    0.002   19.957   20.005
 multiply_cannon                   2055 13.4    0.246    0.258   17.475   18.685
 multiply_cannon_loop              2055 14.4    0.321    0.333   16.127   16.343
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.203   12.266
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   12.203   12.266
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.843   10.899
 ot_mini                             99 10.5    0.001    0.001   10.590   10.632
 multiply_cannon_multrec           8220 15.4    3.234    4.544    7.538    8.743
 mp_waitall_1                    103326 16.6    6.192    7.978    6.192    7.978
 init_scf_loop                       11  6.9    0.000    0.000    7.968    7.969
 qs_ot_get_derivative                99 11.5    0.001    0.001    6.731    6.779
 sum_up_and_integrate               110 10.3    0.079    0.080    6.692    6.705
 integrate_v_rspace                 110 11.3    0.003    0.003    6.613    6.626
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.482    6.496
 calculate_rho_elec                 110  8.6    0.115    0.116    6.481    6.495
 prepare_preconditioner              11  7.9    0.000    0.000    6.268    6.276
 make_preconditioner                 11  8.9    0.000    0.000    6.268    6.276
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.842    5.913
 qs_ot_get_p                        110 10.4    0.001    0.001    5.411    5.471
 dbcsr_mm_accdrv_process          17442 15.9    2.794    3.920    4.172    5.157
 init_scf_run                        11  5.9    0.000    0.001    5.118    5.119
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.118    5.118
 multiply_cannon_metrocomm3        8220 15.4    0.017    0.018    3.150    4.584
 make_m2s                          4110 13.4    0.039    0.040    4.233    4.499
 make_images                       4110 14.4    0.635    0.683    4.103    4.372
 pw_transfer                       1331 11.6    0.066    0.072    4.244    4.263
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    4.137    4.158
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.974    3.978
 ot_diis_step                        99 11.5    0.012    0.012    3.834    3.835
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.738    3.798
 apply_single                       110 13.6    0.000    0.000    3.738    3.797
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.660    3.661
 density_rs2pw                      110  9.6    0.004    0.004    3.210    3.585
 fft_wrap_pw1pw2_140                451 13.1    0.215    0.218    3.560    3.585
 grid_integrate_task_list           110 12.3    3.355    3.531    3.355    3.531
 fft3d_ps                          1111 14.6    1.144    1.180    3.380    3.402
 cp_fm_cholesky_invert               11 10.9    3.118    3.123    3.118    3.123
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.098    3.099
 cp_fm_redistribute_end              48 14.0    0.794    3.060    0.801    3.061
 multiply_cannon_sync_h2d          8220 15.4    2.896    3.053    2.896    3.053
 cp_fm_diag_elpa_base                48 14.0    2.072    2.849    2.250    3.009
 make_images_data                  4110 15.4    0.039    0.044    2.413    2.840
 wfi_extrapolate                     11  7.9    0.001    0.001    2.803    2.804
 hybrid_alltoall_any               4261 16.3    0.200    0.866    2.334    2.729
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.704    2.704
 rs_pw_transfer                     902 11.9    0.010    0.010    2.251    2.642
 grid_collocate_task_list           110  9.6    2.277    2.540    2.277    2.540
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.507    2.508
 calculate_dm_sparse                110  9.5    0.001    0.001    2.473    2.507
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.217    2.218
 potential_pw2rs                    110 12.3    0.015    0.015    2.198    2.203
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.142    2.164
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    1.990    2.010
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.769    1.982
 mp_alltoall_d11v                  2046 13.8    1.726    1.967    1.726    1.967
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    1.850    1.876
 cp_fm_cholesky_decompose            22 10.9    1.706    1.717    1.706    1.717
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.696    1.703
 jit_kernel_multiply                  9 15.9    1.071    1.700    1.071    1.700
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.545    1.669
 mp_allgather_i34                  2055 14.4    0.538    1.642    0.538    1.642
 dbcsr_complete_redistribute        325 12.2    0.558    0.595    1.478    1.570
 mp_waitany                        9240 13.8    1.066    1.486    1.066    1.486
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.433    1.444
 qs_create_task_list                 11  7.9    0.000    0.000    1.230    1.334
 generate_qs_task_list               11  8.9    0.379    0.447    1.230    1.334
 multiply_cannon_metrocomm4        6165 15.4    0.017    0.019    0.474    1.300
 mp_irecv_dv                      24056 15.7    0.450    1.258    0.450    1.258
 rs_pw_transfer_RS2PW_140           121 11.5    0.169    0.184    0.803    1.206
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.165    1.192
 rs_gather_matrices                 110 12.3    0.324    0.364    0.970    1.181
 mp_alltoall_z22v                  1111 16.6    1.125    1.151    1.125    1.151
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=57.035000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=778.636364, yerr=11.080472
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.364816E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67104.
 MP_Allreduce         9672                    819.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.019    0.040   89.721   89.723
 qs_mol_dyn_low                       1  2.0    0.003    0.003   88.992   89.309
 qs_forces                           11  3.9    0.002    0.002   88.880   88.881
 qs_energies                         11  4.9    0.001    0.002   84.665   84.667
 scf_env_do_scf                      11  5.9    0.001    0.001   74.171   74.172
 velocity_verlet                     10  3.0    0.002    0.002   56.653   56.660
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   45.410   45.411
 dbcsr_multiply_generic            2055 12.4    0.119    0.122   28.880   28.971
 init_scf_loop                       11  6.9    0.000    0.000   28.685   28.687
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.891   26.943
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.890   26.943
 prepare_preconditioner              11  7.9    0.000    0.000   26.644   26.653
 make_preconditioner                 11  8.9    0.000    0.000   26.644   26.653
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.922   26.106
 ot_scf_mini                         99  9.5    0.002    0.002   25.137   25.176
 multiply_cannon                   2055 13.4    0.338    0.359   21.672   22.486
 multiply_cannon_loop              2055 14.4    0.344    0.377   19.839   20.112
 cp_fm_upper_to_full                 70 14.2   12.550   17.981   12.550   17.981
 rebuild_ks_matrix                  110  8.3    0.001    0.001   14.248   14.298
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   14.247   14.297
 ot_mini                             99 10.5    0.001    0.001   13.681   13.721
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.912   12.957
 dbcsr_complete_redistribute        325 12.2    1.022    1.046    7.348   10.529
 multiply_cannon_multrec           8220 15.4    4.369    4.555    9.858    9.988
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.294    9.486
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.204    9.243
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.706    8.876
 mp_alltoall_i22                    605 13.7    5.326    8.499    5.326    8.499
 mp_waitall_1                     84994 16.7    7.308    8.122    7.308    8.122
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.890    7.926
 calculate_rho_elec                 110  8.6    0.227    0.228    7.890    7.925
 sum_up_and_integrate               110 10.3    0.150    0.151    7.485    7.504
 integrate_v_rspace                 110 11.3    0.003    0.003    7.335    7.353
 qs_ot_get_p                        110 10.4    0.001    0.001    6.268    6.368
 init_scf_run                        11  5.9    0.000    0.001    6.186    6.186
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.185    6.186
 make_m2s                          4110 13.4    0.043    0.044    5.309    5.815
 cp_fm_cholesky_invert               11 10.9    5.713    5.718    5.713    5.718
 dbcsr_mm_accdrv_process          11614 15.7    3.261    3.688    5.346    5.628
 make_images                       4110 14.4    0.878    0.928    5.120    5.625
 pw_transfer                       1331 11.6    0.075    0.075    5.362    5.370
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    5.245    5.253
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.541    5.025
 apply_single                       110 13.6    0.000    0.000    4.540    5.025
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.019    4.469    4.783
 qs_ot_p2m_diag                      48 11.0    0.150    0.155    4.593    4.599
 fft_wrap_pw1pw2_140                451 13.1    0.221    0.223    4.526    4.532
 fft3d_ps                          1111 14.6    1.298    1.306    4.438    4.445
 ot_diis_step                        99 11.5    0.015    0.016    4.445    4.445
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    4.140    4.141
 density_rs2pw                      110  9.6    0.004    0.004    3.929    3.963
 multiply_cannon_sync_h2d          8220 15.4    3.947    3.951    3.947    3.951
 grid_integrate_task_list           110 12.3    3.658    3.721    3.658    3.721
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.694    3.695
 hybrid_alltoall_any               4261 16.3    0.257    0.555    2.859    3.577
 make_images_data                  4110 15.4    0.042    0.045    2.872    3.531
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.495    3.495
 cp_fm_diag_elpa_base                48 14.0    2.963    3.156    3.493    3.493
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.918    3.364
 wfi_extrapolate                     11  7.9    0.001    0.001    3.355    3.355
 calculate_dm_sparse                110  9.5    0.001    0.001    3.306    3.328
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.115    3.117
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.708    2.709
 grid_collocate_task_list           110  9.6    2.632    2.654    2.632    2.654
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.606    2.623
 potential_pw2rs                    110 12.3    0.021    0.021    2.606    2.608
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.486    2.506
 rs_pw_transfer                     902 11.9    0.011    0.011    2.422    2.438
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    2.222    2.290
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.089    2.183
 cp_fm_cholesky_decompose            22 10.9    2.135    2.151    2.135    2.151
 mp_alltoall_d11v                  2046 13.8    1.979    2.077    1.979    2.077
 jit_kernel_multiply                 10 15.3    1.886    2.048    1.886    2.048
 qs_create_task_list                 11  7.9    0.000    0.000    1.911    1.959
 generate_qs_task_list               11  8.9    0.736    0.790    1.911    1.959
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.945    1.951
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.861    1.877
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.796    1.838
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=89.723000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1206.181818, yerr=60.380337
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             627.904512E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175954870160
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4075                  56898.
 MP_Allreduce        11228                    786.
 MP_Sync               170
 MP_Alltoall          2226                2520958.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_comm_split          83
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.046  206.638  206.640
 qs_mol_dyn_low                       1  2.0    0.004    0.007  206.151  206.165
 qs_forces                           11  3.9    0.003    0.004  206.058  206.061
 qs_energies                         11  4.9    0.001    0.002  200.396  200.409
 scf_env_do_scf                      11  5.9    0.001    0.001  183.730  183.734
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.009  162.442  162.444
 dbcsr_multiply_generic            2507 12.6    0.177    0.183  123.869  124.651
 velocity_verlet                     10  3.0    0.001    0.002  124.346  124.347
 qs_scf_new_mos                     117  7.6    0.001    0.001  121.836  122.049
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  121.835  122.049
 ot_scf_mini                        117  9.6    0.003    0.003  115.278  115.477
 multiply_cannon                   2507 13.6    0.236    0.249  100.922  102.661
 multiply_cannon_loop              2507 14.6    2.101    2.193   98.686  100.403
 ot_mini                            117 10.6    0.001    0.001   65.610   65.831
 multiply_cannon_multrec          60168 15.6   33.471   35.270   41.847   43.599
 qs_ot_get_derivative               117 11.6    0.001    0.001   40.638   40.836
 rebuild_ks_matrix                  128  8.3    0.001    0.001   34.037   34.291
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   34.036   34.290
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.605   30.829
 mp_waitall_1                    267128 16.5   27.849   30.407   27.849   30.407
 multiply_cannon_sync_h2d         60168 15.6   27.548   29.429   27.548   29.429
 qs_ot_get_p                        128 10.4    0.001    0.001   27.866   28.083
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.498   25.352
 apply_single                       128 13.6    0.001    0.001   24.498   25.352
 ot_diis_step                       117 11.6    0.007    0.008   24.676   24.677
 init_scf_loop                       11  6.9    0.000    0.001   21.210   21.211
 qs_ot_p2m_diag                      83 11.4    0.077    0.091   21.084   21.137
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   18.729   18.933
 cp_dbcsr_syevd                      83 12.4    0.004    0.005   18.612   18.614
 multiply_cannon_metrocomm3       60168 15.6    0.119    0.124   15.666   17.384
 prepare_preconditioner              11  7.9    0.000    0.000   16.584   16.638
 make_preconditioner                 11  8.9    0.000    0.000   16.584   16.638
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.851   16.041
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   15.549   15.556
 cp_fm_redistribute_end              83 14.4   12.276   15.452   12.290   15.456
 cp_fm_diag_elpa_base                83 14.4    3.116   15.055    3.151   15.174
 sum_up_and_integrate               128 10.3    0.089    0.107   14.551   14.564
 integrate_v_rspace                 128 11.3    0.003    0.004   14.461   14.478
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.043   14.167
 calculate_rho_elec                 128  8.7    0.045    0.064   14.042   14.166
 make_m2s                          5014 13.6    0.106    0.112   13.708   14.088
 make_images                       5014 14.6    0.403    0.421   13.525   13.912
 init_scf_run                        11  5.9    0.000    0.001   12.455   12.456
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   12.455   12.456
 density_rs2pw                      128  9.7    0.006    0.007    7.521   10.780
 cp_fm_cholesky_invert               11 10.9    9.666    9.674    9.666    9.674
 rs_pw_transfer                    1046 11.9    0.016    0.018    6.249    9.516
 wfi_extrapolate                     11  7.9    0.001    0.001    9.199    9.199
 mp_sum_l                          7870 13.0    7.986    9.128    7.986    9.128
 dbcsr_mm_accdrv_process         124484 16.2    3.253    3.482    7.929    8.483
 calculate_dm_sparse                128  9.5    0.001    0.001    8.361    8.464
 pw_transfer                       1547 11.6    0.076    0.101    8.052    8.305
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.083    8.190
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.012    7.846    8.104
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    7.765    7.894
 grid_integrate_task_list           128 12.3    7.001    7.583    7.001    7.583
 multiply_cannon_metrocomm1       60168 15.6    0.090    0.095    5.656    7.470
 make_images_data                  5014 15.6    0.068    0.073    6.585    7.438
 fft_wrap_pw1pw2_140                523 13.2    0.442    0.513    6.587    6.883
 fft3d_ps                          1291 14.7    2.104    2.861    6.638    6.835
 hybrid_alltoall_any               5200 16.5    0.291    2.259    5.755    6.830
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.690    6.698
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.872    5.993
 mp_waitany                       16020 13.9    2.671    5.934    2.671    5.934
 grid_collocate_task_list           128  9.7    4.557    5.817    4.557    5.817
 rs_pw_transfer_RS2PW_140           139 11.5    0.281    0.296    2.116    5.411
 mp_alltoall_d11v                  2415 14.1    4.023    5.274    4.023    5.274
 potential_pw2rs                    128 12.3    0.009    0.011    4.983    5.021
 cp_fm_cholesky_decompose            22 10.9    4.772    4.784    4.772    4.784
 mp_sum_d                          4455 12.2    3.594    4.407    3.594    4.407
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=206.640000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=594.727273, yerr=6.620779
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430457200640       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986252263424       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992001093632       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613077360640       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239162695680       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239162695680       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.232994E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.200017E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806382528       0.0%      0.0%    100.0%
 number of processed stacks               6024768       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1129.7
 marketing flops                   145.651870E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank             824.709120E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2427840
 MPI messages size (bytes):
  total size                         4.132587E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.702166E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               71532               2339635200
     32768 < size <=   131072              729952              56049532928
    131072 < size <=  4194304             1387568            1410045313024
   4194304 < size <= 16777216              155760            1473827979536
  16777216 < size                           68112            1190343475200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4127                  56635.
 MP_Allreduce        11357                    942.
 MP_Sync               172
 MP_Alltoall          1983                5621114.
 MP_SendRecv         12126                  47072.
 MP_ISendRecv        12126                  47072.
 MP_Wait             26114
 MP_comm_split          84
 MP_ISend            11836                 212447.
 MP_IRecv            11836                 212447.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.035  198.678  198.679
 qs_mol_dyn_low                       1  2.0    0.003    0.004  198.230  198.245
 qs_forces                           11  3.9    0.003    0.006  197.011  197.012
 qs_energies                         11  4.9    0.001    0.002  190.191  190.204
 scf_env_do_scf                      11  5.9    0.001    0.001  170.263  170.273
 scf_env_do_scf_inner_loop          118  6.6    0.003    0.008  136.845  136.848
 velocity_verlet                     10  3.0    0.002    0.005  123.462  123.487
 dbcsr_multiply_generic            2529 12.6    0.187    0.195  101.491  102.598
 qs_scf_new_mos                     118  7.6    0.001    0.001   96.273   96.718
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   96.272   96.717
 ot_scf_mini                        118  9.6    0.004    0.004   91.441   91.973
 multiply_cannon                   2529 13.6    0.486    0.533   81.816   85.436
 multiply_cannon_loop              2529 14.6    1.268    1.308   78.409   80.866
 ot_mini                            118 10.6    0.001    0.001   50.244   50.721
 multiply_cannon_multrec          30348 15.6   22.423   26.712   34.475   40.510
 mp_waitall_1                    216598 16.6   25.122   38.211   25.122   38.211
 rebuild_ks_matrix                  129  8.3    0.001    0.001   33.679   34.082
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.019   33.678   34.081
 init_scf_loop                       11  6.9    0.000    0.000   33.324   33.325
 qs_ks_update_qs_env                129  7.6    0.001    0.001   30.341   30.704
 prepare_preconditioner              11  7.9    0.000    0.000   28.918   28.979
 make_preconditioner                 11  8.9    0.000    0.000   28.918   28.979
 qs_ot_get_derivative               118 11.6    0.001    0.002   28.176   28.705
 multiply_cannon_metrocomm3       30348 15.6    0.095    0.101   15.875   28.391
 make_full_inverse_cholesky          11  9.9    0.000    0.000   27.653   28.166
 qs_ot_get_p                        129 10.4    0.001    0.001   22.836   23.485
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   22.067   23.149
 apply_single                       129 13.6    0.001    0.001   22.067   23.149
 ot_diis_step                       118 11.6    0.014    0.015   21.893   21.895
 multiply_cannon_sync_h2d         30348 15.6   19.716   21.870   19.716   21.870
 qs_ot_p2m_diag                      84 11.4    0.189    0.218   17.968   18.008
 cp_fm_cholesky_invert               11 10.9   17.066   17.079   17.066   17.079
 cp_dbcsr_syevd                      84 12.4    0.005    0.006   16.783   16.784
 make_m2s                          5058 13.6    0.091    0.095   13.919   15.554
 make_images                       5058 14.6    1.152    1.338   13.708   15.343
 init_scf_run                        11  5.9    0.000    0.001   15.143   15.144
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   15.142   15.144
 sum_up_and_integrate               129 10.3    0.114    0.132   14.959   14.987
 integrate_v_rspace                 129 11.3    0.003    0.004   14.844   14.876
 qs_rho_update_rho_low              129  7.7    0.001    0.001   14.232   14.273
 calculate_rho_elec                 129  8.7    0.089    0.106   14.231   14.273
 cp_fm_diag_elpa                     84 13.4    0.000    0.001   13.509   13.519
 cp_fm_redistribute_end              84 14.4    7.885   13.432    7.898   13.433
 dbcsr_mm_accdrv_process          62758 16.2    6.520    8.917   11.505   13.340
 cp_fm_diag_elpa_base                84 14.4    5.289   12.903    5.515   13.270
 qs_ot_get_derivative_diag           78 12.4    0.002    0.002   11.143   11.546
 density_rs2pw                      129  9.7    0.006    0.007    7.641   10.408
 multiply_cannon_metrocomm4       27819 15.6    0.095    0.106    3.745   10.228
 make_images_data                  5058 15.6    0.065    0.072    8.256   10.145
 calculate_dm_sparse                129  9.5    0.001    0.001    9.946   10.092
 mp_irecv_dv                      70084 16.3    3.551    9.848    3.551    9.848
 hybrid_alltoall_any               5245 16.5    0.346    1.505    7.043    9.243
 pw_transfer                       1559 11.6    0.088    0.102    8.950    9.024
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.011    8.722    8.799
 rs_pw_transfer                    1054 12.0    0.014    0.016    5.965    8.770
 wfi_extrapolate                     11  7.9    0.001    0.001    8.476    8.476
 fft_wrap_pw1pw2_140                527 13.2    0.475    0.539    7.595    7.692
 grid_integrate_task_list           129 12.3    7.207    7.628    7.207    7.628
 cp_fm_cholesky_decompose            22 10.9    7.049    7.125    7.049    7.125
 fft3d_ps                          1301 14.7    2.768    2.936    7.076    7.122
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.234    6.973
 calculate_first_density_matrix       1  7.0    0.000    0.001    6.492    6.494
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.260    6.269
 grid_collocate_task_list           129  9.7    4.737    5.899    4.737    5.899
 jit_kernel_multiply                 12 15.9    1.970    5.774    1.970    5.774
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    5.688    5.719
 mp_sum_l                          7936 13.1    3.963    5.613    3.963    5.613
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.414    5.554
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    5.387    5.455
 mp_waitany                       11836 13.9    2.433    5.262    2.433    5.262
 potential_pw2rs                    129 12.3    0.015    0.018    5.211    5.233
 mp_alltoall_d11v                  2429 14.1    3.956    4.940    3.956    4.940
 mp_allgather_i34                  2529 14.6    1.979    4.839    1.979    4.839
 rs_pw_transfer_RS2PW_140           140 11.5    0.356    0.380    2.072    4.832
 mp_sum_d                          4499 12.2    2.687    4.141    2.687    4.141
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=198.679000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=785.818182, yerr=3.128118
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410023282688       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444707676160       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796579E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.906045E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705500928       0.0%      0.0%    100.0%
 number of processed stacks               3951168       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1697.1
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             931.856384E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1033760
 MPI messages size (bytes):
  total size                         2.695213E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.607194E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              279168              36591108096
    131072 < size <=  4194304              654272             987691483136
   4194304 < size <= 16777216               65184             925172905552
  16777216 < size                           28448             745747251200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4079                  57277.
 MP_Allreduce        11236                    986.
 MP_Sync               168
 MP_Alltoall          1700                9383497.
 MP_SendRecv          7874                  75008.
 MP_ISendRecv         7874                  75008.
 MP_Wait             21654
 MP_comm_split          82
 MP_ISend            11660                 275234.
 MP_IRecv            11660                 275234.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.042  179.386  179.398
 qs_mol_dyn_low                       1  2.0    0.003    0.005  178.823  178.837
 qs_forces                           11  3.9    0.007    0.022  178.702  178.706
 qs_energies                         11  4.9    0.006    0.031  172.007  172.019
 scf_env_do_scf                      11  5.9    0.004    0.010  156.053  156.054
 scf_env_do_scf_inner_loop          116  6.6    0.004    0.008  120.256  120.256
 velocity_verlet                     10  3.0    0.010    0.015  114.049  114.051
 dbcsr_multiply_generic            2485 12.5    0.181    0.185   81.668   82.900
 qs_scf_new_mos                     116  7.6    0.001    0.001   81.693   82.063
 qs_scf_loop_do_ot                  116  8.6    0.001    0.002   81.692   82.062
 ot_scf_mini                        116  9.6    0.004    0.004   77.558   77.972
 multiply_cannon                   2485 13.5    0.497    0.515   61.725   65.873
 multiply_cannon_loop              2485 14.5    0.852    0.879   58.442   60.885
 ot_mini                            116 10.6    0.001    0.001   42.538   42.963
 init_scf_loop                       11  6.9    0.001    0.004   35.682   35.683
 mp_waitall_1                    169034 16.6   25.130   34.766   25.130   34.766
 prepare_preconditioner              11  7.9    0.000    0.001   31.593   31.647
 make_preconditioner                 11  8.9    0.000    0.002   31.593   31.646
 rebuild_ks_matrix                  127  8.3    0.001    0.001   31.026   31.542
 qs_ks_build_kohn_sham_matrix       127  9.3    0.016    0.019   31.026   31.542
 make_full_inverse_cholesky          11  9.9    0.000    0.000   29.222   30.653
 qs_ks_update_qs_env                127  7.6    0.001    0.001   27.920   28.392
 multiply_cannon_multrec          19880 15.5   13.431   16.553   22.292   25.370
 multiply_cannon_metrocomm3       19880 15.5    0.060    0.064   15.209   24.895
 qs_ot_get_derivative               116 11.6    0.001    0.002   22.782   23.195
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.938   21.146
 apply_single                       127 13.6    0.001    0.001   19.937   21.145
 qs_ot_get_p                        127 10.4    0.001    0.001   20.398   20.969
 ot_diis_step                       116 11.6    0.018    0.018   19.655   19.656
 qs_ot_p2m_diag                      82 11.4    0.261    0.268   16.156   16.164
 make_m2s                          4970 13.5    0.080    0.085   14.939   15.968
 multiply_cannon_sync_h2d         19880 15.5   14.087   15.855   14.087   15.855
 make_images                       4970 14.5    1.149    1.249   14.707   15.734
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   15.149   15.149
 sum_up_and_integrate               127 10.3    0.130    0.140   14.848   14.873
 cp_fm_cholesky_invert               11 10.9   14.844   14.853   14.844   14.853
 integrate_v_rspace                 127 11.3    0.003    0.004   14.718   14.743
 qs_rho_update_rho_low              127  7.7    0.001    0.001   14.513   14.549
 calculate_rho_elec                 127  8.7    0.131    0.146   14.512   14.548
 cp_fm_diag_elpa                     82 13.4    0.000    0.001   11.974   11.976
 cp_fm_redistribute_end              82 14.4    4.524   11.901    4.541   11.903
 cp_fm_diag_elpa_base                82 14.4    6.929   11.288    7.339   11.764
 init_scf_run                        11  5.9    0.000    0.001   10.810   10.810
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   10.809   10.810
 make_images_data                  4970 15.5    0.061    0.068    9.309   10.713
 hybrid_alltoall_any               5155 16.4    0.433    1.984    8.187    9.893
 density_rs2pw                      127  9.7    0.006    0.006    7.422    9.661
 pw_transfer                       1535 11.6    0.086    0.105    9.291    9.407
 multiply_cannon_metrocomm4       17395 15.5    0.061    0.070    3.402    9.330
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    9.068    9.189
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    8.797    9.099
 mp_irecv_dv                      49801 16.2    3.281    9.083    3.281    9.083
 dbcsr_mm_accdrv_process          41158 16.2    4.450    5.484    8.322    8.442
 fft_wrap_pw1pw2_140                519 13.2    0.474    0.519    7.988    8.114
 grid_integrate_task_list           127 12.3    7.221    7.800    7.221    7.800
 wfi_extrapolate                     11  7.9    0.001    0.001    7.537    7.537
 cp_fm_upper_to_full                104 14.5    5.843    7.537    5.843    7.537
 cp_fm_cholesky_decompose            22 10.9    7.486    7.509    7.486    7.509
 rs_pw_transfer                    1038 11.9    0.013    0.014    5.285    7.502
 fft3d_ps                          1281 14.7    2.646    2.870    7.346    7.427
 dbcsr_complete_redistribute        393 12.7    1.168    1.197    4.650    6.412
 calculate_dm_sparse                127  9.5    0.001    0.001    5.869    5.957
 grid_collocate_task_list           127  9.7    4.861    5.611    4.861    5.611
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.604    5.608
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.626    5.370
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002    3.428    5.185
 mp_alltoall_d11v                  2401 14.1    4.451    5.170    4.451    5.170
 mp_allgather_i34                  2485 14.5    1.830    5.027    1.830    5.027
 potential_pw2rs                    127 12.3    0.020    0.022    4.980    5.002
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.686    4.839
 mp_waitany                       11660 13.9    2.339    4.653    2.339    4.653
 mp_sum_l                          7804 13.0    3.221    4.635    3.221    4.635
 rs_pw_transfer_RS2PW_140           138 11.5    0.330    0.353    1.907    4.129
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    2.351    4.092
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    4.018    4.045
 mp_alltoall_i22                    712 14.1    1.924    3.824    1.924    3.824
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    3.789    3.789
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.615    3.660
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=179.398000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=883.727273, yerr=10.855170
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.353788E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               5977344       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.138360E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1143192
 MPI messages size (bytes):
  total size                         2.023815E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.770319E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              319024              36042702848
    131072 < size <=  4194304              715736             785529176064
   4194304 < size <= 16777216               70320             665379241840
  16777216 < size                           30720             536870912000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4075                  57332.
 MP_Allreduce        11226                   1068.
 MP_Sync               170
 MP_Alltoall          1712               12503107.
 MP_SendRecv          5888                  75008.
 MP_ISendRecv         5888                  75008.
 MP_Wait             22442
 MP_comm_split          83
 MP_ISend            14952                 244818.
 MP_IRecv            14952                 244818.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.023    0.064  192.286  192.287
 qs_mol_dyn_low                       1  2.0    0.003    0.004  191.819  191.832
 qs_forces                           11  3.9    0.003    0.006  191.704  191.708
 qs_energies                         11  4.9    0.002    0.005  184.491  184.501
 scf_env_do_scf                      11  5.9    0.001    0.002  165.382  165.390
 velocity_verlet                     10  3.0    0.002    0.002  125.542  125.544
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  118.385  118.386
 dbcsr_multiply_generic            2507 12.6    0.189    0.196   81.260   81.922
 qs_scf_new_mos                     117  7.6    0.001    0.001   80.928   81.257
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   80.928   81.256
 ot_scf_mini                        117  9.6    0.003    0.005   76.470   76.766
 multiply_cannon                   2507 13.6    0.558    0.590   56.999   59.882
 multiply_cannon_loop              2507 14.6    1.184    1.215   53.163   54.857
 init_scf_loop                       11  6.9    0.001    0.004   46.872   46.874
 prepare_preconditioner              11  7.9    0.000    0.001   42.758   42.776
 make_preconditioner                 11  8.9    0.000    0.000   42.758   42.776
 ot_mini                            117 10.6    0.001    0.001   42.365   42.657
 make_full_inverse_cholesky          11  9.9    0.000    0.000   36.487   41.394
 multiply_cannon_multrec          30084 15.6   14.224   19.358   28.648   33.254
 rebuild_ks_matrix                  128  8.3    0.001    0.001   29.877   30.158
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.019   29.877   30.157
 qs_ks_update_qs_env                128  7.6    0.001    0.001   26.911   27.155
 mp_waitall_1                    147882 16.7   16.549   26.397   16.549   26.397
 qs_ot_get_derivative               117 11.6    0.001    0.002   22.738   23.042
 make_m2s                          5014 13.6    0.096    0.099   19.888   20.984
 qs_ot_get_p                        128 10.4    0.001    0.001   20.417   20.750
 make_images                       5014 14.6    1.943    2.204   19.581   20.677
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   19.051   19.527
 apply_single                       128 13.6    0.001    0.001   19.051   19.527
 ot_diis_step                       117 11.6    0.017    0.018   19.494   19.496
 cp_fm_cholesky_invert               11 10.9   16.686   16.697   16.686   16.697
 qs_ot_p2m_diag                      83 11.4    0.342    0.390   16.285   16.337
 cp_fm_upper_to_full                105 14.7   10.916   16.162   10.916   16.162
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   14.984   14.986
 sum_up_and_integrate               128 10.3    0.140    0.153   14.819   14.842
 integrate_v_rspace                 128 11.3    0.003    0.004   14.678   14.706
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.581   14.629
 calculate_rho_elec                 128  8.7    0.175    0.190   14.581   14.629
 multiply_cannon_metrocomm3       30084 15.6    0.047    0.049    6.140   14.491
 dbcsr_mm_accdrv_process          62264 16.2    8.775   10.508   13.998   14.479
 init_scf_run                        11  5.9    0.000    0.001   13.157   13.159
 scf_env_initial_rho_setup           11  6.9    0.001    0.004   13.157   13.159
 dbcsr_complete_redistribute        395 12.7    1.512    1.622    8.865   12.619
 multiply_cannon_sync_h2d         30084 15.6   11.738   12.397   11.738   12.397
 make_images_data                  5014 15.6    0.065    0.069   10.539   12.332
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   11.727   11.731
 cp_fm_redistribute_end              83 14.4    2.037   11.616    2.056   11.622
 cp_fm_diag_elpa_base                83 14.4    8.940   10.960    9.520   11.435
 copy_fm_to_dbcsr                   209 11.7    0.002    0.003    7.481   11.234
 hybrid_alltoall_any               5200 16.5    0.526    2.219    9.509   11.188
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.248    9.893
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.334    9.551
 pw_transfer                       1547 11.6    0.087    0.106    9.220    9.326
 mp_alltoall_i22                    716 14.1    5.479    9.213    5.479    9.213
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    8.993    9.101
 density_rs2pw                      128  9.7    0.006    0.006    7.090    8.538
 calculate_dm_sparse                128  9.5    0.001    0.001    8.421    8.528
 fft_wrap_pw1pw2_140                523 13.2    0.482    0.493    7.920    8.033
 cp_fm_cholesky_decompose            22 10.9    7.810    7.916    7.810    7.916
 grid_integrate_task_list           128 12.3    7.533    7.907    7.533    7.907
 wfi_extrapolate                     11  7.9    0.001    0.001    7.647    7.647
 fft3d_ps                          1291 14.7    2.776    2.881    7.217    7.306
 multiply_cannon_metrocomm4       25070 15.6    0.075    0.083    2.767    6.858
 mp_irecv_dv                      76098 16.2    2.622    6.599    2.622    6.599
 rs_pw_transfer                    1046 11.9    0.013    0.014    4.900    6.409
 grid_collocate_task_list           128  9.7    5.080    5.798    5.080    5.798
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.479    5.524
 mp_alltoall_d11v                  2415 14.1    4.616    5.429    4.616    5.429
 calculate_first_density_matrix       1  7.0    0.000    0.002    5.308    5.311
 potential_pw2rs                    128 12.3    0.022    0.023    4.722    4.737
 jit_kernel_multiply                 12 15.8    2.513    4.712    2.513    4.712
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    4.606    4.627
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.442    4.496
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    4.475    4.476
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.384    4.469
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    4.201    4.260
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=192.287000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1072.636364, yerr=20.825128
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022950912       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1919850381312       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444706349056       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019187724288       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019187724288       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.792282E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.819790E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705435008       0.0%      0.0%    100.0%
 number of processed stacks               1943760       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3449.7
 marketing flops                   143.503447E+12
 -------------------------------------------------------------------------------
 # multiplications                           2483
 max memory usage/rank               1.507324E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  238368
 MPI messages size (bytes):
  total size                         1.321004E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.541866E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              112608              59039023104
   4194304 < size <= 16777216              104112             545846722560
  16777216 < size                           20064             716108613552
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8846                     52.
 MP_Alltoall          9592                 804121.
 MP_ISend            39684                2106261.
 MP_IRecv            39684                2105362.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3956                  58872.
 MP_Allreduce        10887                   1178.
 MP_Sync                87
 MP_Alltoall          1700               18828162.
 MP_SendRecv          3810                 122880.
 MP_ISendRecv         3810                 122880.
 MP_Wait             16000
 MP_ISend            10600                 423612.
 MP_IRecv            10600                 423612.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.019    0.035  176.773  176.775
 qs_mol_dyn_low                       1  2.0    0.003    0.003  176.346  176.358
 qs_forces                           11  3.9    0.003    0.003  176.252  176.255
 qs_energies                         11  4.9    0.002    0.002  168.641  168.650
 scf_env_do_scf                      11  5.9    0.001    0.001  151.091  151.096
 velocity_verlet                     10  3.0    0.002    0.003  115.083  115.087
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  114.915  114.917
 qs_scf_new_mos                     116  7.6    0.001    0.001   75.946   76.041
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   75.945   76.041
 dbcsr_multiply_generic            2483 12.5    0.181    0.188   74.757   75.289
 ot_scf_mini                        116  9.6    0.004    0.004   71.549   71.682
 multiply_cannon                   2483 13.5    0.575    0.601   54.738   59.408
 multiply_cannon_loop              2483 14.5    0.441    0.456   50.138   51.248
 ot_mini                            116 10.6    0.001    0.001   39.891   40.035
 init_scf_loop                       11  6.9    0.000    0.000   36.024   36.026
 mp_waitall_1                    124584 16.7   26.504   33.984   26.504   33.984
 prepare_preconditioner              11  7.9    0.000    0.000   32.032   32.059
 make_preconditioner                 11  8.9    0.000    0.000   32.032   32.059
 rebuild_ks_matrix                  127  8.3    0.001    0.001   30.175   30.323
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.018   30.174   30.322
 make_full_inverse_cholesky          11  9.9    0.000    0.000   29.859   30.128
 qs_ks_update_qs_env                127  7.6    0.001    0.001   27.383   27.520
 multiply_cannon_multrec           9932 15.5   10.306   14.741   17.894   22.668
 multiply_cannon_metrocomm3        9932 15.5    0.023    0.024   12.764   20.272
 qs_ot_get_derivative               116 11.6    0.001    0.002   20.018   20.149
 ot_diis_step                       116 11.6    0.019    0.020   19.800   19.800
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.395   19.712
 apply_single                       127 13.6    0.001    0.001   19.395   19.712
 qs_ot_get_p                        127 10.4    0.001    0.001   18.729   18.855
 make_m2s                          4966 13.5    0.066    0.071   16.344   18.731
 make_images                       4966 14.5    2.243    2.587   16.040   18.424
 cp_fm_cholesky_invert               11 10.9   18.410   18.416   18.410   18.416
 qs_rho_update_rho_low              127  7.7    0.001    0.001   15.525   15.573
 calculate_rho_elec                 127  8.7    0.257    0.266   15.524   15.572
 sum_up_and_integrate               127 10.3    0.178    0.188   15.177   15.229
 integrate_v_rspace                 127 11.3    0.004    0.004   14.998   15.054
 qs_ot_p2m_diag                      83 11.4    0.495    0.500   14.904   14.920
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   13.748   13.749
 make_images_data                  4966 15.5    0.052    0.059   10.056   12.656
 hybrid_alltoall_any               5152 16.4    0.832    3.774    9.844   12.295
 multiply_cannon_sync_h2d          9932 15.5   11.419   12.213   11.419   12.213
 init_scf_run                        11  5.9    0.000    0.001   10.704   10.704
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.704   10.704
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   10.627   10.639
 cp_fm_diag_elpa_base                83 14.4   10.384   10.463   10.622   10.634
 pw_transfer                       1535 11.6    0.085    0.094   10.037   10.080
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    9.815    9.864
 fft_wrap_pw1pw2_140                519 13.2    0.493    0.514    8.639    8.695
 qs_ot_get_derivative_diag           77 12.4    0.002    0.003    8.114    8.218
 grid_integrate_task_list           127 12.3    7.708    8.055    7.708    8.055
 fft3d_ps                          1281 14.7    2.691    2.770    7.988    8.024
 density_rs2pw                      127  9.7    0.005    0.006    7.161    8.023
 cp_fm_cholesky_decompose            22 10.9    7.913    8.014    7.913    8.014
 dbcsr_mm_accdrv_process          20582 16.1    2.602    3.570    7.225    7.901
 multiply_cannon_metrocomm1        9932 15.5    0.028    0.029    4.392    7.810
 mp_allgather_i34                  2483 14.5    2.839    7.600    2.839    7.600
 wfi_extrapolate                     11  7.9    0.001    0.001    7.511    7.511
 calculate_dm_sparse                127  9.5    0.001    0.001    6.148    6.251
 mp_alltoall_d11v                  2407 14.1    5.143    6.111    5.143    6.111
 grid_collocate_task_list           127  9.7    5.351    6.001    5.351    6.001
 dbcsr_complete_redistribute        395 12.7    2.114    2.187    5.281    5.678
 multiply_cannon_metrocomm4        7449 15.5    0.024    0.027    1.859    5.616
 mp_irecv_dv                      28602 15.9    1.823    5.529    1.823    5.529
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.371    5.384
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    5.315    5.317
 rs_pw_transfer                    1038 11.9    0.012    0.013    4.219    5.130
 potential_pw2rs                    127 12.3    0.026    0.026    4.679    4.691
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.278    4.309
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.558    3.866
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.454    3.784
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    3.733    3.773
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.689    3.700
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    3.601    3.685
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=176.775000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1399.545455, yerr=50.526975
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410023282688       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444707676160       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796579E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.606413E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705500928       0.0%      0.0%    100.0%
 number of processed stacks               1947808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3442.6
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               3.006079E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   99400
 MPI messages size (bytes):
  total size                         1.127422E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.342275E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               44768              34745614336
   4194304 < size <= 16777216               43984             376564613120
  16777216 < size                           10032             716108638608
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  59127.
 MP_Allreduce        11005                   1515.
 MP_Sync                86
 MP_Alltoall          1700               36954383.
 MP_SendRecv          1778                 218624.
 MP_ISendRecv         1778                 218624.
 MP_Wait              9728
 MP_ISend             6360                1080477.
 MP_IRecv             6360                1080477.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.051    0.088  299.309  299.314
 qs_mol_dyn_low                       1  2.0    0.003    0.003  298.631  298.643
 qs_forces                           11  3.9    0.003    0.003  298.535  298.538
 qs_energies                         11  4.9    0.002    0.002  289.237  289.245
 scf_env_do_scf                      11  5.9    0.001    0.001  266.813  266.824
 velocity_verlet                     10  3.0    0.002    0.002  215.340  215.348
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  140.296  140.298
 init_scf_loop                       11  6.9    0.000    0.000  126.260  126.263
 prepare_preconditioner              11  7.9    0.000    0.000  121.216  121.246
 make_preconditioner                 11  8.9    0.000    0.000  121.216  121.246
 make_full_inverse_cholesky          11  9.9    0.000    0.000   97.273  118.382
 qs_scf_new_mos                     116  7.6    0.001    0.001   89.830   89.952
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   89.829   89.951
 ot_scf_mini                        116  9.6    0.004    0.004   85.198   85.226
 dbcsr_multiply_generic            2485 12.5    0.212    0.219   80.738   81.261
 cp_fm_upper_to_full                104 14.8   52.831   75.768   52.831   75.768
 multiply_cannon                   2485 13.5    0.702    0.744   56.664   57.256
 multiply_cannon_loop              2485 14.5    0.467    0.480   53.180   54.512
 ot_mini                            116 10.6    0.001    0.001   43.140   43.168
 dbcsr_complete_redistribute        393 12.7    3.968    4.018   29.600   42.591
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002   26.208   39.132
 rebuild_ks_matrix                  127  8.3    0.001    0.001   37.983   38.023
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.018   37.982   38.022
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000   23.896   36.628
 qs_ks_update_qs_env                127  7.6    0.001    0.001   35.017   35.060
 cp_fm_cholesky_invert               11 10.9   34.905   34.912   34.905   34.912
 mp_alltoall_i22                    712 14.1   21.764   34.485   21.764   34.485
 mp_waitall_1                    102768 16.8   26.093   29.479   26.093   29.479
 qs_ot_get_p                        127 10.4    0.001    0.001   27.134   27.155
 qs_ot_get_derivative               116 11.6    0.002    0.002   23.649   23.682
 qs_ot_p2m_diag                      82 11.4    0.867    0.872   23.148   23.179
 cp_dbcsr_syevd                      82 12.4    0.005    0.006   21.414   21.416
 qs_rho_update_rho_low              127  7.7    0.001    0.001   20.562   20.578
 calculate_rho_elec                 127  8.7    0.479    0.481   20.561   20.577
 make_m2s                          4970 13.5    0.077    0.079   19.420   20.187
 sum_up_and_integrate               127 10.3    0.319    0.322   19.724   19.810
 make_images                       4970 14.5    3.701    3.816   18.944   19.715
 integrate_v_rspace                 127 11.3    0.004    0.004   19.404   19.491
 ot_diis_step                       116 11.6    0.022    0.022   19.460   19.461
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   18.653   18.854
 apply_single                       127 13.6    0.001    0.001   18.652   18.853
 multiply_cannon_metrocomm3        9940 15.5    0.024    0.024   17.071   18.192
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   18.133   18.135
 cp_fm_diag_elpa_base                82 14.4   13.844   15.409   18.129   18.130
 multiply_cannon_multrec           9940 15.5   10.382   12.178   17.986   18.065
 multiply_cannon_sync_h2d          9940 15.5   15.529   15.537   15.529   15.537
 init_scf_run                        11  5.9    0.000    0.001   12.370   12.371
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.370   12.371
 pw_transfer                       1535 11.6    0.092    0.093   12.181   12.188
 fft_wrap_pw1pw2                   1281 12.7    0.011    0.011   11.948   11.955
 hybrid_alltoall_any               5155 16.4    1.295    3.012   10.293   11.781
 make_images_data                  4970 15.5    0.061    0.066   10.235   11.679
 fft_wrap_pw1pw2_140                519 13.2    0.536    0.538   10.558   10.571
 fft3d_ps                          1281 14.7    2.719    2.727    9.998   10.006
 cp_fm_cholesky_decompose            22 10.9    9.223    9.252    9.223    9.252
 dbcsr_mm_accdrv_process          20590 16.0    3.796    5.741    7.367    9.190
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.124    9.150
 mp_alltoall_d11v                  2401 14.1    8.298    9.020    8.298    9.020
 wfi_extrapolate                     11  7.9    0.001    0.001    8.976    8.976
 grid_integrate_task_list           127 12.3    8.502    8.676    8.502    8.676
 density_rs2pw                      127  9.7    0.005    0.005    8.315    8.456
 qs_energies_init_hamiltonians       11  5.9    0.002    0.002    8.055    8.056
 calculate_dm_sparse                127  9.5    0.001    0.001    6.611    6.732
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.280    6.363
 grid_collocate_task_list           127  9.7    6.306    6.339    6.306    6.339
 copy_dbcsr_to_fm                   185 11.7    0.004    0.004    6.074    6.158
 rs_scatter_matrices                138  9.7    3.548    4.486    5.900    6.135
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=299.314000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2653.909091, yerr=176.639054
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.262141E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255669.
 MP_Allreduce         3059                   6274.
 MP_Sync                 4
 MP_Alltoall            54                6805335.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.037   84.482   84.484
 qs_energies                          1  2.0    0.000    0.000   84.034   84.041
 ls_scf                               1  3.0    0.000    0.000   83.117   83.123
 dbcsr_multiply_generic             111  6.7    0.015    0.016   72.153   72.312
 multiply_cannon                    111  7.7    0.017    0.021   55.657   56.837
 multiply_cannon_loop               111  8.7    0.212    0.226   52.226   53.523
 ls_scf_main                          1  4.0    0.000    0.000   52.060   52.060
 density_matrix_trs4                  2  5.0    0.002    0.003   46.744   46.821
 ls_scf_init_scf                      1  4.0    0.000    0.000   28.105   28.107
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   26.991   27.055
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   24.855   24.871
 mp_waitall_1                     11031 10.9   21.280   24.051   21.280   24.051
 multiply_cannon_multrec           2664  9.7    8.253    8.936   15.583   17.226
 multiply_cannon_sync_h2d          2664  9.7   14.334   16.136   14.334   16.136
 make_m2s                           222  7.7    0.008    0.010   12.959   13.521
 make_images                        222  8.7    0.099    0.108   12.937   13.502
 multiply_cannon_metrocomm1        2664  9.7    0.010    0.010    8.841   11.156
 make_images_data                   222  9.7    0.004    0.005    7.541    8.085
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.226    7.974
 dbcsr_mm_accdrv_process           4760 10.4    0.512    0.609    6.944    7.903
 hybrid_alltoall_any                227 10.6    0.215    1.835    6.475    7.601
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.230    7.115    6.230    7.115
 calculate_norms                   4752  9.8    5.606    6.192    5.606    6.192
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.796    4.906
 mp_sum_l                           807  5.4    2.996    4.671    2.996    4.671
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.015    2.025    3.881
 mp_irecv_dv                       6231 10.9    2.008    3.854    2.008    3.854
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.252    3.612
 arnoldi_extremal                     4  6.8    0.000    0.000    3.282    3.300
 arnoldi_normal_ev                    4  7.8    0.001    0.002    3.282    3.300
 build_subspace                      16  8.4    0.009    0.012    3.189    3.192
 make_images_sizes                  222  9.7    0.000    0.000    0.705    3.175
 mp_alltoall_i44                    222 10.7    0.705    3.175    0.705    3.175
 ls_scf_post                          1  4.0    0.000    0.000    2.952    2.958
 ls_scf_store_result                  1  5.0    0.000    0.000    2.767    2.811
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.362    2.762
 dbcsr_merge_single_wm              555 10.7    0.466    0.592    2.354    2.754
 make_images_pack                   222  9.7    2.206    2.624    2.208    2.625
 dbcsr_matrix_vector_mult           304  9.0    0.005    0.013    2.326    2.566
 dbcsr_sort_data                    658 11.4    2.148    2.501    2.148    2.501
 dbcsr_matrix_vector_mult_local     304 10.0    2.070    2.462    2.072    2.464
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.250    2.297
 buffer_matrices_ensure_size        222  8.7    1.767    2.076    1.767    2.076
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.785    1.787
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.776    1.777
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    1.776    1.777
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=84.484000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1142.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.103837E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  10339.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.048   90.729   90.730
 qs_energies                          1  2.0    0.000    0.000   90.279   90.284
 ls_scf                               1  3.0    0.000    0.000   88.965   88.970
 dbcsr_multiply_generic             111  6.7    0.015    0.016   75.019   75.367
 multiply_cannon                    111  7.7    0.028    0.036   52.896   56.724
 ls_scf_main                          1  4.0    0.000    0.000   54.753   54.757
 multiply_cannon_loop               111  8.7    0.116    0.122   49.667   53.328
 density_matrix_trs4                  2  5.0    0.002    0.003   49.022   49.220
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.685   30.686
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.501   29.579
 mp_waitall_1                      9105 10.9   20.850   29.219   20.850   29.219
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.099   27.112
 multiply_cannon_multrec           1332  9.7   13.216   16.419   22.442   26.809
 multiply_cannon_metrocomm3        1332  9.7    0.006    0.008   11.279   20.217
 make_m2s                           222  7.7    0.006    0.007   15.473   16.148
 make_images                        222  8.7    1.566    1.924   15.443   16.120
 dbcsr_mm_accdrv_process           4041 10.4    0.269    0.453    8.826   10.422
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.411    9.969    8.411    9.969
 make_images_data                   222  9.7    0.004    0.004    8.925    9.816
 hybrid_alltoall_any                227 10.6    0.518    2.425    8.346    9.282
 mp_sum_l                           807  5.4    5.357    8.392    5.357    8.392
 multiply_cannon_metrocomm4        1221  9.7    0.006    0.008    3.188    7.460
 mp_irecv_dv                       3311 11.0    3.169    7.411    3.169    7.411
 calculate_norms                   2376  9.8    6.020    6.828    6.020    6.828
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.149    6.585
 multiply_cannon_sync_h2d          1332  9.7    4.886    5.933    4.886    5.933
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.027    5.210
 arnoldi_extremal                     4  6.8    0.000    0.000    4.630    4.650
 arnoldi_normal_ev                    4  7.8    0.001    0.004    4.630    4.650
 build_subspace                      16  8.4    0.014    0.021    4.376    4.379
 ls_scf_post                          1  4.0    0.000    0.000    3.528    3.533
 dbcsr_matrix_vector_mult           304  9.0    0.009    0.021    3.140    3.373
 ls_scf_store_result                  1  5.0    0.000    0.000    3.240    3.352
 dbcsr_matrix_vector_mult_local     304 10.0    2.731    3.211    2.733    3.213
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.275    2.817
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.614    2.695
 mp_allgather_i34                   111  8.7    0.929    2.421    0.929    2.421
 make_images_pack                   222  9.7    2.018    2.396    2.021    2.398
 dbcsr_sort_data                    436 11.2    1.846    2.086    1.846    2.086
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.889    1.891
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.876    1.878
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    1.876    1.878
 dbcsr_data_new                    4174 10.1    1.610    1.849    1.610    1.849
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=90.730000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1709.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.702148E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265470.
 MP_Allreduce         3058                  11181.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.069    0.118   94.071   94.073
 qs_energies                          1  2.0    0.000    0.000   93.314   93.318
 ls_scf                               1  3.0    0.000    0.001   91.916   91.921
 dbcsr_multiply_generic             111  6.7    0.015    0.016   76.659   76.934
 ls_scf_main                          1  4.0    0.000    0.001   57.737   57.740
 multiply_cannon                    111  7.7    0.038    0.096   52.780   56.664
 multiply_cannon_loop               111  8.7    0.100    0.111   49.077   53.862
 density_matrix_trs4                  2  5.0    0.002    0.003   51.770   51.958
 mp_waitall_1                      7281 11.0   23.178   33.790   23.178   33.790
 ls_scf_init_scf                      1  4.0    0.000    0.001   30.656   30.659
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   29.460   29.527
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.072   27.084
 multiply_cannon_multrec            888  9.7   12.653   15.269   21.199   24.394
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   10.766   23.363
 make_m2s                           222  7.7    0.006    0.007   16.708   18.123
 make_images                        222  8.7    1.968    2.269   16.669   18.084
 make_images_data                   222  9.7    0.003    0.004    9.338   10.503
 hybrid_alltoall_any                227 10.6    0.623    2.872    8.912   10.468
 mp_sum_l                           807  5.4    5.631    9.639    5.631    9.639
 dbcsr_mm_accdrv_process           3754 10.4    0.263    0.434    8.046    9.268
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.672    8.834    7.672    8.834
 multiply_cannon_sync_h2d           888  9.7    6.078    7.702    6.078    7.702
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.331    7.501
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.452    7.021
 mp_irecv_dv                       2335 11.1    2.436    6.964    2.436    6.964
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.700    6.606
 arnoldi_extremal                     4  6.8    0.000    0.000    5.111    5.126
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.111    5.126
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.916    5.117
 build_subspace                      16  8.4    0.014    0.020    4.807    4.812
 calculate_norms                   1584  9.8    4.262    4.620    4.262    4.620
 mp_allgather_i34                   111  8.7    1.544    3.905    1.544    3.905
 dbcsr_matrix_vector_mult           304  9.0    0.009    0.020    3.472    3.788
 dbcsr_matrix_vector_mult_local     304 10.0    3.003    3.576    3.005    3.578
 ls_scf_post                          1  4.0    0.000    0.001    3.523    3.529
 ls_scf_store_result                  1  5.0    0.000    0.000    3.269    3.346
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.837    2.948
 dbcsr_sort_data                    325 11.1    1.897    2.160    1.897    2.160
 make_images_pack                   222  9.7    1.813    2.131    1.816    2.133
 dbcsr_data_release                9322 10.9    1.339    2.088    1.339    2.088
 make_images_sizes                  222  9.7    0.000    0.000    1.052    1.982
 mp_alltoall_i44                    222 10.7    1.052    1.981    1.052    1.981
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.923    1.925
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.905    1.907
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.002    1.905    1.907
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=94.073000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2180.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.349246E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  13371.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.084    0.142   96.039   96.048
 qs_energies                          1  2.0    0.000    0.000   95.224   95.230
 ls_scf                               1  3.0    0.000    0.001   93.580   93.585
 dbcsr_multiply_generic             111  6.7    0.016    0.018   77.343   77.614
 ls_scf_main                          1  4.0    0.000    0.003   57.994   57.995
 multiply_cannon                    111  7.7    0.061    0.179   50.876   55.778
 density_matrix_trs4                  2  5.0    0.002    0.003   51.900   52.027
 multiply_cannon_loop               111  8.7    0.114    0.126   45.743   49.091
 ls_scf_init_scf                      1  4.0    0.001    0.005   32.328   32.330
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   31.105   31.185
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   28.660   28.674
 mp_waitall_1                      6369 11.0   21.428   27.451   21.428   27.451
 multiply_cannon_multrec           1332  9.7   14.307   17.916   22.162   25.438
 make_m2s                           222  7.7    0.007    0.008   20.685   22.207
 make_images                        222  8.7    3.134    3.594   20.635   22.159
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.004    8.504   16.552
 make_images_data                   222  9.7    0.004    0.004   11.414   13.019
 hybrid_alltoall_any                227 10.6    0.801    3.821   10.607   12.669
 dbcsr_mm_accdrv_process           3641 10.4    0.228    0.412    7.487    9.004
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.114    8.584    7.114    8.584
 mp_sum_l                           807  5.4    4.255    7.968    4.255    7.968
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.252    6.358
 multiply_cannon_sync_h2d          1332  9.7    5.537    6.068    5.537    6.068
 multiply_cannon_metrocomm4        1110  9.7    0.004    0.006    2.067    5.947
 mp_irecv_dv                       3229 10.9    2.045    5.884    2.045    5.884
 arnoldi_extremal                     4  6.8    0.000    0.000    5.234    5.245
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.234    5.245
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.343    5.127
 build_subspace                      16  8.4    0.014    0.021    4.898    4.904
 mp_allgather_i34                   111  8.7    2.268    4.822    2.268    4.822
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.529    4.718
 calculate_norms                   2376  9.8    4.190    4.551    4.190    4.551
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.020    3.610    3.894
 dbcsr_matrix_vector_mult_local     304 10.0    3.164    3.669    3.166    3.671
 dbcsr_sort_data                    658 11.4    3.048    3.486    3.048    3.486
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.788    3.280
 dbcsr_merge_single_wm              555 10.7    0.532    0.669    2.780    3.272
 ls_scf_post                          1  4.0    0.000    0.001    3.258    3.265
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.010    3.064
 ls_scf_store_result                  1  5.0    0.000    0.000    2.953    3.032
 dbcsr_data_release               10477 10.7    1.582    2.412    1.582    2.412
 dbcsr_finalize                     304  7.8    0.049    0.061    1.802    1.988
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.956    1.958
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.933    1.935
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    1.933    1.935
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=96.048000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2719.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.627677E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265558.
 MP_Allreduce         3049                  15663.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.042    0.058   99.128   99.129
 qs_energies                          1  2.0    0.000    0.000   98.334   98.340
 ls_scf                               1  3.0    0.000    0.000   96.395   96.405
 dbcsr_multiply_generic             111  6.7    0.017    0.019   77.482   77.710
 ls_scf_main                          1  4.0    0.000    0.000   62.218   62.219
 multiply_cannon                    111  7.7    0.085    0.179   55.066   60.616
 density_matrix_trs4                  2  5.0    0.002    0.003   54.932   55.058
 multiply_cannon_loop               111  8.7    0.070    0.079   50.484   52.473
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.631   30.636
 mp_waitall_1                      5436 11.0   25.873   30.417   25.873   30.417
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.273   29.310
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.009   27.019
 multiply_cannon_multrec            444  9.7   14.049   16.076   21.073   22.988
 make_m2s                           222  7.7    0.004    0.005   17.321   19.897
 make_images                        222  8.7    3.712    4.410   17.260   19.838
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   11.514   16.516
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    5.561   14.132
 make_images_data                   222  9.7    0.003    0.004    9.602   12.067
 hybrid_alltoall_any                227 10.6    0.791    3.784    9.382   11.936
 multiply_cannon_sync_h2d           444  9.7    6.502    8.133    6.502    8.133
 dbcsr_mm_accdrv_process           3003 10.4    0.187    0.341    6.732    7.882
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.417    7.533    6.417    7.533
 mp_allgather_i34                   111  8.7    2.818    6.981    2.818    6.981
 arnoldi_extremal                     4  6.8    0.000    0.000    5.946    5.952
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.946    5.952
 mp_sum_l                           807  5.4    3.146    5.571    3.146    5.571
 build_subspace                      16  8.4    0.015    0.019    5.551    5.562
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.578    4.756
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.213    4.541
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.020    4.224    4.428
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.664    4.201
 mp_irecv_dv                       1241 11.2    1.644    4.171    1.644    4.171
 dbcsr_matrix_vector_mult_local     304 10.0    3.656    4.122    3.658    4.124
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.678    3.759
 calculate_norms                    792  9.8    3.530    3.689    3.530    3.689
 ls_scf_post                          1  4.0    0.000    0.000    3.545    3.551
 ls_scf_store_result                  1  5.0    0.000    0.000    3.332    3.371
 make_images_sizes                  222  9.7    0.000    0.000    1.087    3.225
 mp_alltoall_i44                    222 10.7    1.087    3.225    1.087    3.225
 dbcsr_finalize                     304  7.8    0.062    0.077    2.198    2.263
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.198    2.199
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.166    2.167
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    2.166    2.167
 dbcsr_merge_all                    275  8.9    0.472    0.515    2.046    2.092
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=99.129000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3636.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/e64b331552e1588eec11f575b9e771b24f7bf608_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.731542E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284111.
 MP_Allreduce         3043                  21950.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.078    0.098  106.307  106.308
 qs_energies                          1  2.0    0.000    0.000  104.897  104.911
 ls_scf                               1  3.0    0.000    0.000  101.957  101.969
 dbcsr_multiply_generic             111  6.7    0.023    0.028   75.268   75.369
 ls_scf_main                          1  4.0    0.000    0.000   64.531   64.532
 density_matrix_trs4                  2  5.0    0.002    0.003   55.327   55.380
 multiply_cannon                    111  7.7    0.178    0.222   48.176   49.512
 multiply_cannon_loop               111  8.7    0.068    0.070   44.921   45.884
 ls_scf_init_scf                      1  4.0    0.000    0.000   33.813   33.814
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   32.162   32.177
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.356   29.362
 make_m2s                           222  7.7    0.005    0.005   23.431   24.180
 make_images                        222  8.7    4.584    4.953   23.324   24.071
 mp_waitall_1                      4527 11.1   20.292   23.818   20.292   23.818
 multiply_cannon_multrec            444  9.7   17.837   18.614   22.431   23.007
 hybrid_alltoall_any                227 10.6    1.665    3.633   12.472   15.010
 make_images_data                   222  9.7    0.003    0.003   12.676   14.818
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    9.071    9.583
 multiply_cannon_sync_h2d           444  9.7    8.850    8.894    8.850    8.894
 arnoldi_extremal                     4  6.8    0.000    0.000    7.468    7.477
 arnoldi_normal_ev                    4  7.8    0.002    0.009    7.468    7.477
 build_subspace                      16  8.4    0.026    0.037    6.909    6.919
 dbcsr_matrix_vector_mult           304  9.0    0.016    0.032    5.474    5.622
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.235    5.326
 dbcsr_matrix_vector_mult_local     304 10.0    4.949    5.246    4.952    5.249
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.936    5.184
 dbcsr_mm_accdrv_process           1814 10.4    0.161    0.320    4.424    4.554
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.126    4.258    4.126    4.258
 ls_scf_post                          1  4.0    0.000    0.000    3.612    3.626
 make_images_sizes                  222  9.7    0.000    0.000    1.427    3.487
 mp_alltoall_i44                    222 10.7    1.426    3.487    1.426    3.487
 ls_scf_store_result                  1  5.0    0.000    0.000    3.367    3.380
 calculate_norms                    792  9.8    3.240    3.274    3.240    3.274
 dbcsr_finalize                     304  7.8    0.082    0.089    3.088    3.163
 dbcsr_merge_all                    275  8.9    0.889    0.913    2.873    2.939
 qs_energies_init_hamiltonians        1  3.0    0.001    0.002    2.910    2.910
 dbcsr_complete_redistribute          5  7.6    1.437    1.466    2.782    2.892
 dbcsr_data_release               12724 10.6    2.327    2.833    2.327    2.833
 mp_allgather_i34                   111  8.7    0.889    2.799    0.889    2.799
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.571    2.574
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.424    2.543
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.506    2.509
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    2.506    2.509
 dbcsr_sort_data                    325 11.1    2.446    2.502    2.446    2.502
 dbcsr_new_transposed                 4  7.5    0.242    0.250    2.302    2.320
 dbcsr_frobenius_norm                74  6.6    2.056    2.128    2.201    2.243
 dbcsr_add_d                        103  6.2    0.000    0.000    2.156    2.227
 dbcsr_add_anytype                  103  7.2    0.860    0.894    2.156    2.227
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=106.308000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=6818.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: e64b331552e1588eec11f575b9e771b24f7bf608
Summary: empty
Status: OK