=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 1e070e574e92b071248faedeb93e25aa28b92159


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1,
#              Cray-FFTW 3.3.8.10, COSMA 2.6.6, ELPA 2023.05.001,
#              HDF5 1.14.2, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.2.2, LIBVORI 220621, LIBXSMM 1.17,
#              PLUMED 2.9.0, SIRIUS 7.4.3, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Last update: 04.10.2023
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.6
USE_ELPA       := 2023.05.001
USE_HDF5       := 1.14.2
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.2.2
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.9.0
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.4.3
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.5
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
endif

ifneq ($(USE_HDF5),)
   USE_HDF5       := $(strip $(USE_HDF5))
   HDF5_INC       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/include
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/lib
   CFLAGS         += -I$(HDF5_INC)
   DFLAGS         += -D__HDF5
   LIBS           += $(HDF5_LIB)/libhdf5_fortran.a
   LIBS           += $(HDF5_LIB)/libhdf5_hl.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/01
 job id: 49903541
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/02
 job id: 49903542
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/03
 job id: 49903543
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/04
 job id: 49903544
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/05
 job id: 49903545
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/06
 job id: 49903546
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/07
 job id: 49903547
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/08
 job id: 49903548
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/09
 job id: 49903549
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/10
 job id: 49903550
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/11
 job id: 49903551
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/12
 job id: 49903553
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/13
 job id: 49903554
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/14
 job id: 49903555
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/15
 job id: 49903556
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/16
 job id: 49903558
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/17
 job id: 49903559
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/18
 job id: 49903560
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/19
 job id: 49903561
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/20
 job id: 49903562
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/21
 job id: 49903563
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/22
 job id: 49903564
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/23
 job id: 49903566
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/24
 job id: 49903568
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/25
 job id: 49903570
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/26
 job id: 49903571
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: 512 H2O (4 NVE MD steps on 64 nodes)
 input file: benchmarks/QS/00512_H2O/H2O-512_md.inp
 required files: []
 output file: result.log
 # nodes = 64
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/27
 job id: 49903572
 --- Point ---
 name: 601
 plot: h2o_512_md
 regex: CP2K  
 label: (64n/12r/1t)
 --- Point ---
 name: 602
 plot: h2o_512_md_mem
 regex: Estimated peak process memory 
 label: (64n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md_mem", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          424                      8.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.020    0.045  134.649  134.649
 farming_run                          1  2.0  134.142  134.143  134.608  134.611
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.491112E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              230                1134128.
 MP_Allreduce          571                1938539.
 MP_Sync                25
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split          10
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.006    0.023  115.636  115.637
 qs_energies                          1  2.0    0.000    0.000  115.443  115.446
 mp2_main                             1  3.0    0.000    0.000  113.181  113.184
 mp2_gpw_main                         1  4.0    0.027    0.033  112.095  112.099
 mp2_ri_gpw_compute_in                1  5.0    0.172    0.174   92.963   93.405
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.004   54.745   55.188
 mp2_eri_3c_integrate_gpw           272  7.0    0.150    0.163   41.152   46.295
 get_2c_integrals                     1  6.0    0.008    0.009   37.439   38.046
 integrate_v_rspace                 273  8.0    0.441    0.456   24.676   29.622
 pw_transfer                       6555 10.6    0.377    0.410   27.094   27.551
 fft_wrap_pw1pw2                   5465 11.4    0.044    0.048   25.651   26.010
 grid_integrate_task_list           273  9.0   20.552   25.985   20.552   25.985
 fft_wrap_pw1pw2_100               2178 12.4    1.202    1.410   23.213   23.557
 compute_2c_integrals                 1  7.0    0.002    0.002   19.591   19.592
 compute_2c_integrals_loop_lm         1  8.0    0.002    0.004   18.846   19.323
 mp2_eri_2c_integrate_gpw             1  9.0    2.341    2.412   18.844   19.323
 rpa_ri_compute_en                    1  5.0    0.042    0.047   19.029   19.262
 cp_fm_cholesky_decompose            12  8.2   17.834   18.457   17.834   18.457
 cholesky_decomp                      1  7.0    0.000    0.000   16.689   17.316
 fft3d_s                           5443 13.4   16.238   16.515   16.260   16.537
 ao_to_mo_and_store_B_mult_1        272  7.0   10.764   15.312   10.764   15.312
 calculate_wavefunction             272  8.0    5.411    5.560   12.369   13.021
 rpa_num_int                          1  6.0    0.000    0.001   10.728   10.729
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.691   10.719
 calc_mat_Q                           8  8.0    0.000    0.000    9.498    9.602
 contract_S_to_Q                      8  9.0    0.000    0.000    8.920    9.024
 calc_potential_gpw                 544  9.5    0.005    0.006    8.321    8.821
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.503    8.616
 parallel_gemm_fm_cosma              14 10.1    8.503    8.615    8.503    8.615
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.001    8.237    8.545
 potential_pw2rs                    545 10.0    0.107    0.108    7.604    8.218
 create_integ_mat                     1  6.0    0.022    0.028    7.738    7.739
 collocate_single_gaussian          272 10.0    0.039    0.042    7.460    7.720
 array2fm                             1  7.0    0.000    0.000    6.658    7.174
 pw_scatter_s                      2720 13.7    4.323    4.486    4.323    4.486
 pw_gather_s                       2722 13.2    3.485    3.839    3.485    3.839
 array2fm_buffer_send                 1  8.0    2.912    3.060    2.912    3.060
 pw_poisson_solve                   545 10.5    1.120    1.189    2.270    2.524
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=112.098471, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2807.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          424                      9.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.023    0.037  420.982  420.983
 farming_run                          1  2.0  420.234  420.246  420.940  420.943
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827141120       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788822       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.244889E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              757                 478553.
 MP_Allreduce         2021                  21391.
 MP_Sync                37
 MP_Alltoall            77
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.031  216.957  216.958
 qs_energies                          1  2.0    0.000    0.000  216.732  216.744
 scf_env_do_scf                       1  3.0    0.000    0.000  114.849  114.849
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  113.919  113.926
 rebuild_ks_matrix                    4  6.0    0.000    0.000  113.918  113.925
 qs_ks_build_kohn_sham_matrix         4  7.0    0.053    0.061  113.918  113.925
 hfx_ks_matrix                        4  8.0    0.001    0.001  113.576  113.581
 integrate_four_center                4  9.0    0.152    0.464  113.575  113.580
 integrate_four_center_main           4 10.0    0.117    0.666  101.898  105.035
 integrate_four_center_bin          263 11.0  101.781  104.622  101.781  104.622
 mp2_main                             1  3.0    0.000    0.000  101.583  101.595
 mp2_gpw_main                         1  4.0    0.054    0.091  100.743  100.756
 init_scf_loop                        1  4.0    0.000    0.000   96.609   96.609
 mp2_ri_gpw_compute_in                1  5.0    0.069    0.086   73.891   74.908
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   53.782   54.791
 mp2_eri_3c_integrate_gpw            91  7.0    0.143    0.160   41.179   46.504
 integrate_v_rspace                  95  8.0    0.398    0.569   27.645   32.819
 pw_transfer                       2240 10.6    0.143    0.175   29.541   29.925
 fft_wrap_pw1pw2                   1868 11.4    0.017    0.022   28.539   28.972
 grid_integrate_task_list            95  9.0   23.064   28.441   23.064   28.441
 mp2_ri_gpw_compute_en                1  5.0    0.061    0.075   26.699   28.346
 fft_wrap_pw1pw2_100                730 12.4    1.315    1.470   26.288   26.696
 ao_to_mo_and_store_B_mult_1         91  7.0   10.926   26.180   10.926   26.180
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.838    1.893   25.014   25.024
 get_2c_integrals                     1  6.0    0.000    0.000   20.012   20.051
 compute_2c_integrals                 1  7.0    0.002    0.003   19.004   19.018
 fft3d_s                           1823 13.4   18.494   18.981   18.508   18.995
 compute_2c_integrals_loop_lm         1  8.0    0.001    0.001   18.702   18.883
 mp2_eri_2c_integrate_gpw             1  9.0    1.730    1.885   18.701   18.883
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.000   18.239   18.239
 calculate_wavefunction              91  8.0    2.014    2.047    9.622    9.852
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.558    0.590    8.853    9.392
 potential_pw2rs                    186 10.0    0.033    0.035    8.434    9.048
 local_gemm                         172  8.0    8.296    8.803    8.296    8.803
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.000    0.001    8.198    8.549
 mp2_ri_gpw_compute_en_comm          22  7.0    0.501    0.519    7.938    8.474
 calc_potential_gpw                 182  9.5    0.002    0.002    7.935    8.117
 collocate_single_gaussian           91 10.0    0.017    0.023    7.790    7.957
 mp_sync                             37 10.5    3.679    6.897    3.679    6.897
 integrate_four_center_load           4 10.0    0.000    0.000    6.738    6.742
 hfx_load_balance                     1 11.0    0.000    0.000    6.738    6.742
 mp_sendrecv_dm3                   2068  8.0    5.965    6.487    5.965    6.487
 mp2_ri_gpw_compute_en_ener         172  7.0    6.340    6.427    6.340    6.427
 pw_gather_s                        912 13.2    4.464    5.121    4.464    5.121
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=100.750076, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1502.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             452.104192E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62379.
 MP_Allreduce        10329                    270.
 MP_Sync               530
 MP_Alltoall          2083                 592243.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.010    0.030   55.407   55.410
 qs_mol_dyn_low                       1  2.0    0.003    0.004   55.199   55.208
 qs_forces                           11  3.9    0.002    0.002   55.106   55.107
 qs_energies                         11  4.9    0.003    0.003   53.597   53.610
 scf_env_do_scf                      11  5.9    0.000    0.001   47.601   47.601
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.009   45.385   45.385
 qs_scf_new_mos                     108  7.5    0.000    0.001   34.950   35.236
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   34.949   35.236
 dbcsr_multiply_generic            2286 12.5    0.093    0.097   34.216   34.736
 ot_scf_mini                        108  9.5    0.002    0.002   33.237   33.426
 velocity_verlet                     10  3.0    0.002    0.002   28.339   28.340
 multiply_cannon                   2286 13.5    0.197    0.209   26.557   28.062
 multiply_cannon_loop              2286 14.5    1.829    1.942   25.831   27.348
 ot_mini                            108 10.5    0.001    0.001   20.098   20.353
 qs_ot_get_derivative               108 11.5    0.001    0.001   16.960   17.153
 mp_waitall_1                    245248 16.5    8.907   14.933    8.907   14.933
 multiply_cannon_metrocomm3       54864 15.5    0.073    0.079    6.271   13.142
 multiply_cannon_multrec          54864 15.5    3.624    5.673    7.654   11.106
 qs_ot_get_p                        119 10.4    0.001    0.001    8.341    8.612
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.068    8.208
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.012    8.068    8.208
 mp_sum_l                          7287 12.8    5.482    7.318    5.482    7.318
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.124    7.248
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    6.485    6.904
 multiply_cannon_sync_h2d         54864 15.5    5.089    6.764    5.089    6.764
 dbcsr_mm_accdrv_process          76910 16.1    1.822    2.888    3.942    5.658
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    5.315    5.352
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    4.957    5.071
 init_scf_run                        11  5.9    0.000    0.001    4.697    4.697
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    4.696    4.697
 sum_up_and_integrate               119 10.3    0.001    0.002    4.630    4.637
 integrate_v_rspace                 119 11.3    0.002    0.002    4.619    4.628
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    4.573    4.573
 qs_rho_update_rho_low              119  7.7    0.000    0.001    4.384    4.531
 calculate_rho_elec                 119  8.7    0.011    0.016    4.383    4.531
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    4.333    4.334
 cp_fm_redistribute_end              50 14.0    2.210    4.300    2.219    4.304
 cp_fm_diag_elpa_base                50 14.0    2.078    4.184    2.082    4.195
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.990    3.196
 apply_single                       119 13.6    0.000    0.000    2.990    3.196
 calculate_dm_sparse                119  9.5    0.000    0.000    2.969    3.115
 multiply_cannon_metrocomm1       54864 15.5    0.056    0.062    1.848    2.991
 ot_diis_step                       108 11.5    0.006    0.006    2.832    2.832
 acc_transpose_blocks             54864 15.5    0.229    0.258    2.215    2.750
 jit_kernel_multiply                 13 15.8    2.058    2.713    2.058    2.713
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.544    2.589
 density_rs2pw                      119  9.7    0.004    0.004    2.351    2.453
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.419    2.424
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.215    2.217
 wfi_extrapolate                     11  7.9    0.001    0.001    2.205    2.205
 init_scf_loop                       11  6.9    0.000    0.000    2.193    2.193
 grid_integrate_task_list           119 12.3    2.020    2.122    2.020    2.122
 mp_sum_d                          4135 12.0    1.482    2.089    1.482    2.089
 potential_pw2rs                    119 12.3    0.004    0.004    1.974    1.988
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.912    1.968
 make_m2s                          4572 13.5    0.053    0.055    1.865    1.923
 pw_transfer                       1439 11.6    0.052    0.057    1.829    1.907
 make_images                       4572 14.5    0.134    0.140    1.784    1.840
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    1.754    1.832
 mp_alltoall_d11v                  2130 13.8    1.475    1.686    1.475    1.686
 fft3d_ps                          1201 14.6    0.370    0.478    1.470    1.550
 transfer_rs2pw                     487 10.6    0.005    0.006    1.472    1.545
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.427    1.460
 acc_transpose_blocks_sync       164592 16.5    1.206    1.449    1.206    1.449
 transfer_pw2rs                     487 13.2    0.006    0.007    1.433    1.446
 fft_wrap_pw1pw2_140                487 13.2    0.140    0.152    1.350    1.428
 mp_waitany                       12084 13.8    1.257    1.421    1.257    1.421
 grid_collocate_task_list           119  9.7    1.352    1.400    1.352    1.400
 dbcsr_dot_sd                      1205 11.9    0.049    0.059    0.839    1.229
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=55.410000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=431.181818, yerr=1.113404
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             488.398848E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10306                    303.
 MP_Sync                54
 MP_Alltoall          2060                 219243.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.028   40.192   40.193
 qs_mol_dyn_low                       1  2.0    0.004    0.005   39.955   39.964
 qs_forces                           11  3.9    0.008    0.045   39.824   39.857
 qs_energies                         11  4.9    0.004    0.016   38.130   38.141
 scf_env_do_scf                      11  5.9    0.001    0.003   32.745   32.746
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   30.187   30.187
 dbcsr_multiply_generic            2286 12.5    0.101    0.104   22.749   23.135
 qs_scf_new_mos                     108  7.5    0.001    0.001   21.325   21.555
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   21.324   21.554
 ot_scf_mini                        108  9.5    0.003    0.003   20.401   20.563
 velocity_verlet                     10  3.0    0.003    0.013   18.900   18.902
 multiply_cannon                   2286 13.5    0.213    0.219   17.351   18.855
 multiply_cannon_loop              2286 14.5    1.200    1.262   16.126   17.850
 ot_mini                            108 10.5    0.001    0.001   12.560   12.791
 mp_waitall_1                    200699 16.5    5.770   11.006    5.770   11.006
 qs_ot_get_derivative               108 11.5    0.001    0.002   10.155   10.318
 multiply_cannon_metrocomm3       27432 15.5    0.071    0.074    4.134    9.683
 multiply_cannon_multrec          27432 15.5    1.845    4.083    6.309    9.264
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.919    7.070
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.017    6.918    7.070
 dbcsr_mm_accdrv_process          47894 16.0    3.608    5.853    4.382    6.506
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.102    6.241
 qs_ot_get_p                        119 10.4    0.001    0.001    4.858    5.078
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.831    4.728
 mp_sum_l                          7287 12.8    2.206    4.198    2.206    4.198
 init_scf_run                        11  5.9    0.000    0.001    4.131    4.131
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    4.130    4.131
 sum_up_and_integrate               119 10.3    0.001    0.002    3.810    3.815
 integrate_v_rspace                 119 11.3    0.002    0.003    3.796    3.801
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.743    3.770
 calculate_rho_elec                 119  8.7    0.021    0.027    3.743    3.769
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.079    3.734
 apply_single                       119 13.6    0.000    0.000    3.079    3.734
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    3.156    3.176
 make_m2s                          4572 13.5    0.052    0.053    2.739    3.064
 make_images                       4572 14.5    0.206    0.246    2.652    2.979
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.749    2.749
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.606    2.607
 init_scf_loop                       11  6.9    0.001    0.004    2.530    2.531
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.412    2.500
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.397    2.398
 cp_fm_redistribute_end              50 14.0    1.213    2.368    1.218    2.371
 ot_diis_step                       108 11.5    0.011    0.014    2.351    2.352
 cp_fm_diag_elpa_base                50 14.0    1.120    2.275    1.149    2.312
 calculate_dm_sparse                119  9.5    0.000    0.001    2.170    2.247
 multiply_cannon_sync_h2d         27432 15.5    1.694    2.240    1.694    2.240
 acc_transpose_blocks             27432 15.5    0.115    0.120    1.734    2.115
 density_rs2pw                      119  9.7    0.004    0.004    2.034    2.106
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.068    2.070
 pw_transfer                       1439 11.6    0.065    0.069    1.921    1.961
 grid_integrate_task_list           119 12.3    1.843    1.947    1.843    1.947
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.869    1.907
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.830    1.871
 jit_kernel_multiply                  9 16.4    0.716    1.767    0.716    1.767
 make_images_data                  4572 15.5    0.047    0.054    1.276    1.653
 prepare_preconditioner              11  7.9    0.000    0.000    1.609    1.635
 make_preconditioner                 11  8.9    0.000    0.002    1.609    1.635
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.508    1.568
 potential_pw2rs                    119 12.3    0.006    0.006    1.544    1.555
 hybrid_alltoall_any               4725 16.4    0.054    0.114    1.145    1.517
 fft3d_ps                          1201 14.6    0.520    0.574    1.465    1.502
 wfi_extrapolate                     11  7.9    0.001    0.002    1.462    1.462
 fft_wrap_pw1pw2_140                487 13.2    0.160    0.168    1.419    1.458
 mp_alltoall_d11v                  2130 13.8    1.270    1.420    1.270    1.420
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.360    1.368
 grid_collocate_task_list           119  9.7    1.287    1.343    1.287    1.343
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.241    1.287
 transfer_rs2pw                     487 10.6    0.005    0.005    1.188    1.271
 mp_sum_d                          4135 12.0    0.603    1.089    0.603    1.089
 mp_allgather_i34                  2286 14.5    0.643    1.053    0.643    1.053
 acc_transpose_blocks_kernels     27432 16.5    0.187    0.278    0.769    1.009
 transfer_pw2rs                     487 13.2    0.004    0.005    0.973    0.982
 qs_energies_init_hamiltonians       11  5.9    0.000    0.002    0.968    0.968
 acc_transpose_blocks_sync        82296 16.5    0.824    0.957    0.824    0.957
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.876    0.888
 make_images_sizes                 4572 15.5    0.005    0.005    0.594    0.843
 mp_alltoall_i44                   4572 16.5    0.589    0.838    0.589    0.838
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=40.193000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=464.909091, yerr=1.311110
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             520.617984E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63490.
 MP_Allreduce        10155                    305.
 MP_Sync                54
 MP_Alltoall          1821                1607811.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.009    0.027   33.432   33.433
 qs_mol_dyn_low                       1  2.0    0.006    0.007   33.117   33.127
 qs_forces                           11  3.9    0.007    0.009   33.055   33.058
 qs_energies                         11  4.9    0.003    0.007   31.478   31.487
 scf_env_do_scf                      11  5.9    0.001    0.001   26.585   26.585
 scf_env_do_scf_inner_loop          108  6.5    0.010    0.068   24.095   24.096
 dbcsr_multiply_generic            2286 12.5    0.094    0.098   17.403   17.492
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.054   16.073
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.053   16.072
 velocity_verlet                     10  3.0    0.001    0.002   15.601   15.603
 ot_scf_mini                        108  9.5    0.002    0.003   15.288   15.306
 multiply_cannon                   2286 13.5    0.195    0.198   13.919   14.652
 multiply_cannon_loop              2286 14.5    0.858    0.893   13.095   13.885
 ot_mini                            108 10.5    0.001    0.001    9.414    9.432
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.903    7.919
 multiply_cannon_multrec          18288 15.5    1.917    3.028    7.029    7.381
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.149    6.165
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.149    6.164
 dbcsr_mm_accdrv_process          38222 16.0    4.938    5.880    5.017    5.945
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.424    5.437
 mp_waitall_1                    158411 16.6    2.869    4.062    2.869    4.062
 sum_up_and_integrate               119 10.3    0.001    0.001    3.702    3.707
 integrate_v_rspace                 119 11.3    0.003    0.003    3.689    3.696
 init_scf_run                        11  5.9    0.001    0.008    3.647    3.647
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.645    3.647
 qs_ot_get_p                        119 10.4    0.001    0.001    3.594    3.619
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.936    3.519
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.486    3.497
 calculate_rho_elec                 119  8.7    0.031    0.031    3.485    3.497
 multiply_cannon_metrocomm3       18288 15.5    0.047    0.049    1.484    2.539
 init_scf_loop                       11  6.9    0.000    0.002    2.469    2.470
 qs_ot_p2m_diag                      50 11.0    0.012    0.013    2.406    2.416
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.004    2.376
 apply_single                       119 13.6    0.000    0.000    2.004    2.376
 calculate_first_density_matrix       1  7.0    0.000    0.003    2.364    2.366
 make_m2s                          4572 13.5    0.043    0.044    1.985    2.146
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.097    2.098
 make_images                       4572 14.5    0.192    0.204    1.901    2.060
 density_rs2pw                      119  9.7    0.004    0.004    1.983    2.060
 pw_transfer                       1439 11.6    0.065    0.068    1.926    1.937
 grid_integrate_task_list           119 12.3    1.813    1.895    1.813    1.895
 calculate_dm_sparse                119  9.5    0.000    0.001    1.848    1.856
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.833    1.847
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.816    1.830
 cp_fm_diag_elpa_base                50 14.0    1.791    1.806    1.814    1.828
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.808    1.810
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.795    1.805
 acc_transpose_blocks             18288 15.5    0.080    0.082    1.681    1.770
 prepare_preconditioner              11  7.9    0.000    0.000    1.700    1.702
 make_preconditioner                 11  8.9    0.000    0.001    1.700    1.702
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.636    1.642
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.559    1.641
 mp_sum_l                          7287 12.8    1.121    1.510    1.121    1.510
 potential_pw2rs                    119 12.3    0.007    0.008    1.490    1.499
 ot_diis_step                       108 11.5    0.011    0.011    1.493    1.494
 fft_wrap_pw1pw2_140                487 13.2    0.211    0.217    1.437    1.451
 fft3d_ps                          1201 14.6    0.529    0.547    1.402    1.414
 grid_collocate_task_list           119  9.7    1.235    1.318    1.235    1.318
 multiply_cannon_sync_h2d         18288 15.5    1.060    1.225    1.060    1.225
 wfi_extrapolate                     11  7.9    0.001    0.001    1.223    1.223
 transfer_rs2pw                     487 10.6    0.005    0.005    1.113    1.200
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.182    1.185
 make_images_data                  4572 15.5    0.048    0.052    0.844    1.036
 qs_energies_init_hamiltonians       11  5.9    0.001    0.005    0.997    1.005
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.968    0.985
 transfer_pw2rs                     487 13.2    0.004    0.004    0.927    0.937
 hybrid_alltoall_any               4725 16.4    0.058    0.116    0.728    0.923
 mp_alltoall_d11v                  2130 13.8    0.770    0.918    0.770    0.918
 acc_transpose_blocks_kernels     18288 16.5    0.217    0.224    0.840    0.892
 acc_transpose_blocks_sync        54864 16.5    0.742    0.824    0.742    0.824
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.799    0.800
 mp_alltoall_z22v                  1201 16.6    0.709    0.778    0.709    0.778
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.676    0.749
 cp_fm_cholesky_invert               11 10.9    0.723    0.726    0.723    0.726
 multiply_cannon_metrocomm1       18288 15.5    0.029    0.031    0.461    0.680
 jit_kernel_multiply                  3 17.0    0.021    0.677    0.021    0.677
 jit_kernel_transpose                 5 15.6    0.623    0.669    0.623    0.669
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=33.433000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=496.272727, yerr=1.212879
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             560.709632E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63489.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.032   36.193   36.195
 qs_mol_dyn_low                       1  2.0    0.003    0.004   36.001   36.009
 qs_forces                           11  3.9    0.002    0.003   35.845   35.846
 qs_energies                         11  4.9    0.001    0.002   34.147   34.154
 scf_env_do_scf                      11  5.9    0.000    0.001   28.926   28.928
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   25.613   25.614
 dbcsr_multiply_generic            2286 12.5    0.098    0.101   19.837   19.994
 velocity_verlet                     10  3.0    0.002    0.002   18.298   18.304
 qs_scf_new_mos                     108  7.5    0.001    0.001   17.664   17.726
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   17.663   17.725
 ot_scf_mini                        108  9.5    0.002    0.003   16.641   16.700
 multiply_cannon                   2286 13.5    0.218    0.227   16.138   16.639
 multiply_cannon_loop              2286 14.5    1.521    1.615   15.196   15.620
 ot_mini                            108 10.5    0.001    0.001   10.287   10.353
 multiply_cannon_multrec          27432 15.5    2.480    3.127    9.059    9.304
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.435    8.495
 dbcsr_mm_accdrv_process          47916 15.9    6.069    7.558    6.477    7.770
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.233    6.277
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.232    6.277
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.517    5.556
 init_scf_run                        11  5.9    0.000    0.001    3.893    3.893
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.893    3.893
 qs_ot_get_p                        119 10.4    0.001    0.001    3.516    3.602
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.994    3.495
 sum_up_and_integrate               119 10.3    0.001    0.001    3.431    3.439
 integrate_v_rspace                 119 11.3    0.003    0.003    3.419    3.428
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.305    3.345
 calculate_rho_elec                 119  8.7    0.040    0.046    3.304    3.344
 init_scf_loop                       11  6.9    0.000    0.000    3.294    3.294
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.195    2.647
 apply_single                       119 13.6    0.000    0.000    2.194    2.647
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.511    2.513
 acc_transpose_blocks             27432 15.5    0.120    0.123    2.384    2.508
 prepare_preconditioner              11  7.9    0.000    0.000    2.477    2.486
 make_preconditioner                 11  8.9    0.000    0.000    2.477    2.486
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.093    2.412
 make_m2s                          4572 13.5    0.054    0.056    2.210    2.339
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.304    2.335
 make_images                       4572 14.5    0.273    0.335    2.103    2.231
 mp_waitall_1                    137007 16.6    1.719    2.208    1.719    2.208
 calculate_dm_sparse                119  9.5    0.000    0.000    2.151    2.202
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.144    2.157
 pw_transfer                       1439 11.6    0.065    0.069    1.894    1.929
 grid_integrate_task_list           119 12.3    1.830    1.912    1.830    1.912
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.900    1.900
 density_rs2pw                      119  9.7    0.004    0.004    1.754    1.851
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.802    1.840
 ot_diis_step                       108 11.5    0.012    0.012    1.811    1.811
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.746    1.746
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.719    1.730
 mp_sum_l                          7287 12.8    1.063    1.607    1.063    1.607
 fft_wrap_pw1pw2_140                487 13.2    0.245    0.256    1.498    1.537
 acc_transpose_blocks_sync        82296 16.5    1.413    1.536    1.413    1.536
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.509    1.519
 cp_fm_diag_elpa_base                50 14.0    1.476    1.492    1.507    1.518
 fft3d_ps                          1201 14.6    0.554    0.605    1.322    1.348
 grid_collocate_task_list           119  9.7    1.248    1.337    1.248    1.337
 wfi_extrapolate                     11  7.9    0.001    0.001    1.330    1.330
 multiply_cannon_metrocomm3       27432 15.5    0.040    0.041    0.750    1.306
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.275    1.294
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.255    1.264
 potential_pw2rs                    119 12.3    0.009    0.009    1.241    1.243
 cp_fm_upper_to_full                 72 14.2    0.805    1.141    0.805    1.141
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.070    1.071
 dbcsr_complete_redistribute        329 12.2    0.130    0.158    0.756    1.023
 transfer_rs2pw                     487 10.6    0.004    0.005    0.870    0.982
 make_images_data                  4572 15.5    0.048    0.052    0.817    0.944
 hybrid_alltoall_any               4725 16.4    0.066    0.156    0.696    0.894
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.799    0.875
 mp_alltoall_d11v                  2130 13.8    0.717    0.867    0.717    0.867
 acc_transpose_blocks_kernels     27432 16.5    0.269    0.277    0.823    0.834
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.560    0.821
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.804    0.809
 jit_kernel_multiply                  4 15.9    0.343    0.791    0.343    0.791
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=36.195000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=530.454545, yerr=3.822508
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             625.737728E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63488.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.030   28.935   28.937
 qs_mol_dyn_low                       1  2.0    0.003    0.004   28.764   28.774
 qs_forces                           11  3.9    0.002    0.003   28.706   28.706
 qs_energies                         11  4.9    0.002    0.002   27.015   27.017
 scf_env_do_scf                      11  5.9    0.000    0.001   22.281   22.281
 scf_env_do_scf_inner_loop          108  6.5    0.007    0.012   19.829   19.830
 velocity_verlet                     10  3.0    0.001    0.002   14.630   14.633
 dbcsr_multiply_generic            2286 12.5    0.091    0.096   13.520   13.582
 qs_scf_new_mos                     108  7.5    0.001    0.001   12.202   12.229
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   12.202   12.228
 ot_scf_mini                        108  9.5    0.002    0.002   11.492   11.518
 multiply_cannon                   2286 13.5    0.224    0.230   10.843   11.296
 multiply_cannon_loop              2286 14.5    0.640    0.658    9.921   10.116
 ot_mini                            108 10.5    0.001    0.001    6.773    6.802
 multiply_cannon_multrec           9144 15.5    1.867    2.067    6.298    6.515
 rebuild_ks_matrix                  119  8.3    0.000    0.000    5.735    5.759
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.014    5.734    5.758
 qs_ot_get_derivative               108 11.5    0.001    0.001    5.482    5.508
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.107    5.128
 dbcsr_mm_accdrv_process          12550 15.8    3.855    4.405    4.316    4.446
 sum_up_and_integrate               119 10.3    0.001    0.001    3.413    3.418
 integrate_v_rspace                 119 11.3    0.003    0.003    3.403    3.407
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.344    3.349
 calculate_rho_elec                 119  8.7    0.060    0.061    3.344    3.349
 init_scf_run                        11  5.9    0.000    0.001    3.303    3.303
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.303    3.303
 qs_ot_get_p                        119 10.4    0.001    0.001    2.845    2.876
 init_scf_loop                       11  6.9    0.000    0.000    2.431    2.433
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.155    2.157
 pw_transfer                       1439 11.6    0.066    0.067    1.968    1.978
 make_m2s                          4572 13.5    0.033    0.034    1.800    1.962
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.854    1.942
 grid_integrate_task_list           119 12.3    1.871    1.937    1.871    1.937
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.874    1.887
 make_images                       4572 14.5    0.269    0.300    1.713    1.872
 mp_waitall_1                    115863 16.7    1.463    1.854    1.463    1.854
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.844    1.846
 calculate_dm_sparse                119  9.5    0.000    0.000    1.803    1.822
 density_rs2pw                      119  9.7    0.003    0.004    1.717    1.814
 prepare_preconditioner              11  7.9    0.000    0.000    1.707    1.712
 make_preconditioner                 11  8.9    0.000    0.000    1.707    1.712
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.596    1.628
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.602    1.603
 fft_wrap_pw1pw2_140                487 13.2    0.322    0.329    1.544    1.555
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.547    1.548
 acc_transpose_blocks              9144 15.5    0.041    0.042    1.426    1.447
 grid_collocate_task_list           119  9.7    1.296    1.392    1.296    1.392
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.379    1.386
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.360    1.375
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.286    1.374
 apply_single                       119 13.6    0.000    0.000    1.286    1.373
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.317    1.325
 cp_fm_diag_elpa_base                50 14.0    1.290    1.307    1.316    1.324
 fft3d_ps                          1201 14.6    0.559    0.571    1.289    1.300
 ot_diis_step                       108 11.5    0.013    0.013    1.276    1.276
 potential_pw2rs                    119 12.3    0.010    0.011    1.227    1.229
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.217    1.217
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.188    1.193
 jit_kernel_multiply                  6 15.3    0.422    1.156    0.422    1.156
 wfi_extrapolate                     11  7.9    0.001    0.001    1.097    1.097
 hybrid_alltoall_any               4725 16.4    0.065    0.174    0.779    1.023
 make_images_data                  4572 15.5    0.042    0.045    0.782    0.972
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.876    0.925
 mp_alltoall_d11v                  2130 13.8    0.793    0.881    0.793    0.881
 transfer_rs2pw                     487 10.6    0.004    0.004    0.789    0.872
 cp_fm_cholesky_invert               11 10.9    0.842    0.845    0.842    0.845
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.816    0.824
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.748    0.751
 acc_transpose_blocks_sync        27432 16.5    0.719    0.740    0.719    0.740
 qs_env_update_s_mstruct             11  6.9    0.001    0.002    0.663    0.711
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    0.381    0.672
 mp_allgather_i34                  2286 14.5    0.238    0.671    0.238    0.671
 mp_sum_l                          7287 12.8    0.431    0.670    0.431    0.670
 acc_transpose_blocks_kernels      9144 16.5    0.117    0.120    0.651    0.662
 transfer_pw2rs                     487 13.2    0.003    0.004    0.660    0.662
 mp_alltoall_z22v                  1201 16.6    0.604    0.637    0.604    0.637
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=28.937000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=593.272727, yerr=5.593680
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             772.837376E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63723.
 MP_Allreduce        10154                    429.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.050   42.854   42.855
 qs_mol_dyn_low                       1  2.0    0.003    0.004   42.621   42.630
 qs_forces                           11  3.9    0.002    0.002   42.560   42.561
 qs_energies                         11  4.9    0.002    0.002   40.522   40.525
 scf_env_do_scf                      11  5.9    0.001    0.001   34.904   34.904
 scf_env_do_scf_inner_loop          108  6.5    0.052    0.095   26.969   26.970
 velocity_verlet                     10  3.0    0.002    0.002   24.135   24.140
 dbcsr_multiply_generic            2286 12.5    0.099    0.101   19.220   19.525
 qs_scf_new_mos                     108  7.5    0.001    0.001   17.545   17.685
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   17.545   17.684
 ot_scf_mini                        108  9.5    0.002    0.002   16.368   16.513
 multiply_cannon                   2286 13.5    0.297    0.303   15.227   16.144
 multiply_cannon_loop              2286 14.5    0.861    0.881   13.967   14.926
 ot_mini                            108 10.5    0.001    0.001   10.075   10.240
 multiply_cannon_multrec           9144 15.5    3.386    4.693    8.873    9.043
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.017    8.162
 init_scf_loop                       11  6.9    0.000    0.000    7.907    7.908
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.109    7.252
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    7.109    7.252
 prepare_preconditioner              11  7.9    0.000    0.000    6.866    6.879
 make_preconditioner                 11  8.9    0.000    0.000    6.866    6.879
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.397    6.760
 dbcsr_mm_accdrv_process          12550 15.8    4.570    6.708    5.348    6.740
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.392    6.523
 cp_fm_upper_to_full                 72 14.2    3.182    4.598    3.182    4.598
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.023    4.031
 calculate_rho_elec                 119  8.7    0.118    0.120    4.022    4.030
 sum_up_and_integrate               119 10.3    0.001    0.001    3.793    3.799
 integrate_v_rspace                 119 11.3    0.003    0.004    3.783    3.788
 qs_ot_get_p                        119 10.4    0.001    0.001    3.447    3.585
 init_scf_run                        11  5.9    0.000    0.001    3.549    3.549
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.549    3.549
 mp_waitall_1                     94719 16.7    2.399    3.464    2.399    3.464
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.077    3.462
 dbcsr_complete_redistribute        329 12.2    0.284    0.287    2.010    2.876
 pw_transfer                       1439 11.6    0.069    0.070    2.583    2.589
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.704    2.554
 make_m2s                          4572 13.5    0.037    0.037    2.371    2.526
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.485    2.491
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.180    2.481
 apply_single                       119 13.6    0.000    0.000    2.180    2.481
 multiply_cannon_metrocomm3        9144 15.5    0.021    0.021    1.449    2.444
 make_images                       4572 14.5    0.353    0.386    2.251    2.406
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.464    2.307
 mp_alltoall_i22                    627 13.8    1.441    2.294    1.441    2.294
 calculate_dm_sparse                119  9.5    0.000    0.000    2.269    2.287
 grid_integrate_task_list           119 12.3    2.081    2.157    2.081    2.157
 fft_wrap_pw1pw2_140                487 13.2    0.575    0.578    2.100    2.108
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.045    2.096
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.075    2.076
 density_rs2pw                      119  9.7    0.004    0.004    2.036    2.068
 ot_diis_step                       108 11.5    0.014    0.014    2.030    2.030
 qs_ot_p2m_diag                      50 11.0    0.043    0.044    1.978    1.980
 mp_sum_l                          7287 12.8    1.105    1.897    1.105    1.897
 acc_transpose_blocks              9144 15.5    0.044    0.044    1.811    1.847
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.810    1.811
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.779    1.780
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.622    1.622
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.556    1.598
 fft3d_ps                          1201 14.6    0.594    0.605    1.582    1.587
 grid_collocate_task_list           119  9.7    1.523    1.570    1.523    1.570
 cp_fm_cholesky_invert               11 10.9    1.443    1.446    1.443    1.446
 wfi_extrapolate                     11  7.9    0.001    0.001    1.410    1.410
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.333    1.333
 cp_fm_diag_elpa_base                50 14.0    1.187    1.241    1.331    1.331
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.308    1.329
 potential_pw2rs                    119 12.3    0.014    0.014    1.267    1.269
 hybrid_alltoall_any               4725 16.4    0.090    0.150    1.078    1.268
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.259    1.266
 mp_alltoall_d11v                  2130 13.8    1.227    1.264    1.227    1.264
 make_images_data                  4572 15.5    0.046    0.050    1.010    1.209
 acc_transpose_blocks_sync        27432 16.5    1.103    1.140    1.103    1.140
 qs_env_update_s_mstruct             11  6.9    0.005    0.022    1.091    1.103
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.007    1.060
 jit_kernel_multiply                  6 15.5    0.749    1.046    0.749    1.046
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.928    0.941
 qs_create_task_list                 11  7.9    0.001    0.001    0.928    0.939
 generate_qs_task_list               11  8.9    0.367    0.387    0.927    0.939
 mp_alltoall_z22v                  1201 16.6    0.851    0.870    0.851    0.870
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=42.855000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=721.272727, yerr=15.719651
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             502.059008E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66212.
 MP_Allreduce         9776                    488.
 MP_Sync                52
 MP_Alltoall          1938                1383689.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.034    0.081   81.840   81.841
 qs_mol_dyn_low                       1  2.0    0.004    0.009   81.413   81.481
 qs_forces                           11  3.9    0.003    0.005   81.274   81.275
 qs_energies                         11  4.9    0.003    0.008   78.382   78.396
 scf_env_do_scf                      11  5.9    0.001    0.002   69.386   69.388
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.008   63.848   63.848
 dbcsr_multiply_generic            2055 12.4    0.106    0.108   50.646   50.932
 qs_scf_new_mos                      99  7.5    0.000    0.001   46.778   46.912
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   46.777   46.912
 ot_scf_mini                         99  9.5    0.002    0.003   44.395   44.475
 multiply_cannon                   2055 13.4    0.185    0.190   42.445   43.415
 velocity_verlet                     10  3.0    0.001    0.003   43.134   43.136
 multiply_cannon_loop              2055 14.4    1.797    1.842   41.456   42.436
 ot_mini                             99 10.5    0.001    0.001   26.321   26.417
 qs_ot_get_derivative                99 11.5    0.002    0.010   19.524   19.622
 multiply_cannon_multrec          49320 15.4   11.425   12.102   17.525   18.143
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.536   14.635
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.012   14.536   14.634
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.723   12.805
 mp_waitall_1                    220248 16.4   10.478   11.345   10.478   11.345
 multiply_cannon_sync_h2d         49320 15.4    9.647   10.231    9.647   10.231
 qs_ot_get_p                        110 10.4    0.001    0.001    9.666    9.787
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.550    8.076
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.231    7.798
 apply_single                       110 13.6    0.000    0.000    7.230    7.798
 multiply_cannon_metrocomm3       49320 15.4    0.084    0.088    6.303    7.432
 sum_up_and_integrate               110 10.3    0.002    0.003    7.022    7.038
 integrate_v_rspace                 110 11.3    0.003    0.003    6.997    7.018
 init_scf_run                        11  5.9    0.000    0.001    6.899    6.899
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    6.899    6.899
 ot_diis_step                        99 11.5    0.006    0.008    6.569    6.570
 qs_ot_p2m_diag                      48 11.0    0.012    0.019    6.513    6.533
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.386    6.532
 calculate_rho_elec                 110  8.6    0.021    0.025    6.386    6.531
 dbcsr_mm_accdrv_process          87628 16.1    3.036    3.144    5.968    6.272
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    5.682    5.682
 init_scf_loop                       11  6.9    0.001    0.004    5.508    5.509
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    5.243    5.291
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    5.114    5.137
 cp_fm_diag_elpa_base                48 14.0    5.101    5.126    5.112    5.135
 mp_sum_l                          6594 12.7    3.869    4.720    3.869    4.720
 wfi_extrapolate                     11  7.9    0.001    0.001    4.057    4.058
 make_m2s                          4110 13.4    0.060    0.065    3.915    4.028
 calculate_dm_sparse                110  9.5    0.001    0.001    3.884    3.973
 make_images                       4110 14.4    0.178    0.190    3.820    3.936
 density_rs2pw                      110  9.6    0.004    0.005    3.418    3.640
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    3.540    3.544
 grid_integrate_task_list           110 12.3    3.265    3.417    3.265    3.417
 prepare_preconditioner              11  7.9    0.000    0.000    3.327    3.349
 make_preconditioner                 11  8.9    0.000    0.002    3.327    3.349
 multiply_cannon_metrocomm1       49320 15.4    0.066    0.069    2.301    3.234
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.192    3.228
 pw_transfer                       1331 11.6    0.055    0.065    3.100    3.183
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    3.108    3.171
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.104    3.155
 fft_wrap_pw1pw2                   1111 12.6    0.007    0.008    3.012    3.096
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.739    2.747
 jit_kernel_multiply                 13 15.9    2.655    2.707    2.655    2.707
 fft_wrap_pw1pw2_140                451 13.1    0.390    0.428    2.567    2.658
 potential_pw2rs                    110 12.3    0.005    0.006    2.615    2.637
 acc_transpose_blocks             49320 15.4    0.215    0.222    2.564    2.637
 mp_alltoall_d11v                  2046 13.8    2.074    2.578    2.074    2.578
 fft3d_ps                          1111 14.6    0.793    0.881    2.308    2.376
 grid_collocate_task_list           110  9.6    2.156    2.242    2.156    2.242
 transfer_rs2pw                     451 10.6    0.005    0.006    1.971    2.169
 mp_waitany                       14300 13.8    1.832    2.079    1.832    2.079
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.944    1.963
 make_images_data                  4110 15.4    0.043    0.047    1.766    1.892
 mp_sum_d                          3889 11.9    1.351    1.881    1.351    1.881
 cp_fm_cholesky_invert               11 10.9    1.810    1.814    1.810    1.814
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.785    1.811
 transfer_pw2rs                     451 13.1    0.006    0.007    1.777    1.791
 hybrid_alltoall_any               4261 16.3    0.085    0.486    1.526    1.775
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=81.841000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=476.454545, yerr=2.807899
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             595.058688E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66430.
 MP_Allreduce         9775                    566.
 MP_Sync                52
 MP_Alltoall          1717                2862360.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.007    0.029   68.918   68.919
 qs_mol_dyn_low                       1  2.0    0.003    0.004   68.710   68.721
 qs_forces                           11  3.9    0.003    0.004   68.615   68.615
 qs_energies                         11  4.9    0.001    0.002   65.308   65.311
 scf_env_do_scf                      11  5.9    0.000    0.001   56.859   56.863
 scf_env_do_scf_inner_loop           99  6.5    0.019    0.071   49.254   49.255
 dbcsr_multiply_generic            2055 12.4    0.115    0.119   38.244   38.462
 velocity_verlet                     10  3.0    0.001    0.002   36.036   36.037
 qs_scf_new_mos                      99  7.5    0.001    0.001   33.425   33.567
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   33.424   33.566
 multiply_cannon                   2055 13.4    0.225    0.245   31.624   32.622
 ot_scf_mini                         99  9.5    0.003    0.003   31.759   31.900
 multiply_cannon_loop              2055 14.4    1.170    1.195   30.359   31.479
 ot_mini                             99 10.5    0.001    0.001   18.761   18.911
 multiply_cannon_multrec          24660 15.4    6.988    8.746   14.120   15.602
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.512   13.649
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   13.511   13.648
 qs_ot_get_derivative                99 11.5    0.001    0.001   12.932   13.081
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.888   12.007
 mp_waitall_1                    176588 16.5    7.683   10.338    7.683   10.338
 multiply_cannon_metrocomm3       24660 15.4    0.072    0.074    5.330    8.182
 init_scf_loop                       11  6.9    0.000    0.000    7.571    7.571
 dbcsr_mm_accdrv_process          52282 16.1    5.384    6.525    6.961    7.393
 multiply_cannon_sync_h2d         24660 15.4    6.398    7.345    6.398    7.345
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.581    7.256
 apply_single                       110 13.6    0.000    0.001    6.581    7.255
 qs_ot_get_p                        110 10.4    0.001    0.001    6.404    6.579
 sum_up_and_integrate               110 10.3    0.001    0.003    6.358    6.370
 integrate_v_rspace                 110 11.3    0.003    0.003    6.331    6.346
 init_scf_run                        11  5.9    0.000    0.001    6.083    6.084
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.083    6.084
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.859    5.867
 calculate_rho_elec                 110  8.6    0.039    0.047    5.858    5.867
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    5.027    5.835
 ot_diis_step                        99 11.5    0.010    0.010    5.782    5.782
 prepare_preconditioner              11  7.9    0.000    0.000    5.503    5.520
 make_preconditioner                 11  8.9    0.000    0.000    5.503    5.520
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.069    5.220
 make_m2s                          4110 13.4    0.057    0.060    4.150    4.615
 make_images                       4110 14.4    0.407    0.465    4.041    4.502
 qs_ot_p2m_diag                      48 11.0    0.029    0.044    4.467    4.488
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    4.008    4.008
 pw_transfer                       1331 11.6    0.066    0.072    3.519    3.657
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.412    3.553
 wfi_extrapolate                     11  7.9    0.001    0.001    3.513    3.513
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.448    3.461
 cp_fm_diag_elpa_base                48 14.0    3.403    3.417    3.446    3.458
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.252    3.327
 grid_integrate_task_list           110 12.3    3.169    3.312    3.169    3.312
 density_rs2pw                      110  9.6    0.004    0.005    3.109    3.294
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.213    3.214
 fft_wrap_pw1pw2_140                451 13.1    0.460    0.476    2.924    3.065
 calculate_dm_sparse                110  9.5    0.001    0.001    3.003    3.033
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.874    2.933
 hybrid_alltoall_any               4261 16.3    0.107    0.453    1.992    2.806
 make_images_data                  4110 15.4    0.049    0.053    2.264    2.753
 fft3d_ps                          1111 14.6    1.114    1.344    2.507    2.655
 mp_sum_l                          6594 12.7    1.836    2.636    1.836    2.636
 cp_fm_cholesky_invert               11 10.9    2.550    2.557    2.550    2.557
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.475    2.477
 jit_kernel_multiply                 11 16.2    1.213    2.317    1.213    2.317
 grid_collocate_task_list           110  9.6    2.166    2.285    2.166    2.285
 potential_pw2rs                    110 12.3    0.008    0.009    2.239    2.255
 acc_transpose_blocks             24660 15.4    0.116    0.120    1.982    2.040
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.970    1.995
 mp_alltoall_d11v                  2046 13.8    1.759    1.951    1.759    1.951
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.857    1.858
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.780    1.795
 multiply_cannon_metrocomm4       22605 15.4    0.078    0.082    0.780    1.621
 transfer_rs2pw                     451 10.6    0.007    0.008    1.423    1.593
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.572    1.582
 mp_allgather_i34                  2055 14.4    0.572    1.565    0.572    1.565
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.386    1.487
 dbcsr_complete_redistribute        325 12.2    0.240    0.301    1.140    1.405
 mp_irecv_dv                      57340 16.2    0.652    1.402    0.652    1.402
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=68.919000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=561.636364, yerr=6.732185
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             665.370624E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66421.
 MP_Allreduce         9774                    562.
 MP_Sync                52
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.028   60.060   60.061
 qs_mol_dyn_low                       1  2.0    0.004    0.005   59.749   59.818
 qs_forces                           11  3.9    0.003    0.003   59.411   59.414
 qs_energies                         11  4.9    0.002    0.002   56.262   56.268
 scf_env_do_scf                      11  5.9    0.000    0.001   48.406   48.406
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   39.836   39.836
 velocity_verlet                     10  3.0    0.001    0.002   32.694   32.704
 dbcsr_multiply_generic            2055 12.4    0.107    0.113   29.048   29.273
 qs_scf_new_mos                      99  7.5    0.001    0.001   25.339   25.430
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   25.339   25.429
 ot_scf_mini                         99  9.5    0.002    0.003   24.092   24.204
 multiply_cannon                   2055 13.4    0.215    0.223   22.966   24.115
 multiply_cannon_loop              2055 14.4    0.817    0.843   21.785   22.857
 ot_mini                             99 10.5    0.001    0.001   14.050   14.161
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.063   12.183
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   12.063   12.183
 multiply_cannon_multrec          16440 15.4    3.652    4.819    9.906   11.026
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.617   10.729
 mp_waitall_1                    139946 16.5    7.229   10.383    7.229   10.383
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.566    9.678
 init_scf_loop                       11  6.9    0.000    0.000    8.535    8.536
 multiply_cannon_metrocomm3       16440 15.4    0.046    0.048    4.692    7.634
 prepare_preconditioner              11  7.9    0.000    0.000    6.781    6.799
 make_preconditioner                 11  8.9    0.000    0.000    6.780    6.799
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.087    6.450
 dbcsr_mm_accdrv_process          34862 16.1    5.378    5.806    6.098    6.319
 sum_up_and_integrate               110 10.3    0.001    0.002    6.217    6.232
 integrate_v_rspace                 110 11.3    0.003    0.003    6.191    6.207
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.665    5.675
 calculate_rho_elec                 110  8.6    0.059    0.059    5.665    5.674
 init_scf_run                        11  5.9    0.000    0.001    5.467    5.467
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.466    5.466
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.959    5.443
 apply_single                       110 13.6    0.000    0.000    4.959    5.442
 qs_ot_get_p                        110 10.4    0.001    0.001    5.220    5.356
 make_m2s                          4110 13.4    0.049    0.051    4.106    4.509
 ot_diis_step                        99 11.5    0.011    0.011    4.452    4.453
 make_images                       4110 14.4    0.398    0.520    3.992    4.395
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.585    4.197
 multiply_cannon_sync_h2d         16440 15.4    3.242    3.972    3.242    3.972
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    3.631    3.634
 pw_transfer                       1331 11.6    0.066    0.073    3.516    3.525
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.409    3.420
 grid_integrate_task_list           110 12.3    3.205    3.387    3.205    3.387
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.266    3.266
 density_rs2pw                      110  9.6    0.004    0.005    2.903    3.105
 fft_wrap_pw1pw2_140                451 13.1    0.578    0.586    2.957    2.971
 wfi_extrapolate                     11  7.9    0.001    0.001    2.967    2.968
 make_images_data                  4110 15.4    0.046    0.050    2.323    2.823
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.762    2.763
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.744    2.756
 cp_fm_diag_elpa_base                48 14.0    2.679    2.714    2.742    2.754
 hybrid_alltoall_any               4261 16.3    0.110    0.387    2.066    2.728
 cp_fm_cholesky_invert               11 10.9    2.658    2.664    2.658    2.664
 calculate_dm_sparse                110  9.5    0.001    0.001    2.540    2.563
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.443    2.490
 multiply_cannon_metrocomm4       14385 15.4    0.049    0.053    0.892    2.479
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.388    2.448
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.409    2.410
 fft3d_ps                          1111 14.6    1.091    1.104    2.351    2.366
 grid_collocate_task_list           110  9.6    2.224    2.350    2.224    2.350
 mp_irecv_dv                      48980 15.7    0.816    2.342    0.816    2.342
 potential_pw2rs                    110 12.3    0.011    0.012    2.069    2.074
 mp_alltoall_d11v                  2046 13.8    1.766    2.019    1.766    2.019
 mp_sum_l                          6594 12.7    1.351    1.963    1.351    1.963
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.958    1.962
 dbcsr_complete_redistribute        325 12.2    0.331    0.365    1.440    1.912
 acc_transpose_blocks             16440 15.4    0.077    0.078    1.602    1.812
 cp_fm_upper_to_full                 70 14.2    1.399    1.768    1.399    1.768
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.637    1.652
 cp_fm_cholesky_decompose            22 10.9    1.569    1.589    1.569    1.589
 mp_allgather_i34                  2055 14.4    0.491    1.547    0.491    1.547
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.478    1.491
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.371    1.456
 transfer_rs2pw                     451 10.6    0.005    0.006    1.238    1.436
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.981    1.430
 multiply_cannon_metrocomm1       16440 15.4    0.029    0.030    0.337    1.354
 mp_waitany                       17072 13.8    1.119    1.328    1.119    1.328
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    1.279    1.288
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=60.061000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=629.818182, yerr=9.123578
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             731.508736E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66419.
 MP_Allreduce         9774                    603.
 MP_Sync                52
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.009    0.028   65.743   65.744
 qs_mol_dyn_low                       1  2.0    0.003    0.004   65.528   65.538
 qs_forces                           11  3.9    0.003    0.003   65.449   65.451
 qs_energies                         11  4.9    0.002    0.003   62.071   62.075
 scf_env_do_scf                      11  5.9    0.000    0.001   53.740   53.743
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   41.971   41.972
 velocity_verlet                     10  3.0    0.011    0.020   37.192   37.195
 dbcsr_multiply_generic            2055 12.4    0.114    0.120   30.593   30.789
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.470   27.576
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.470   27.576
 ot_scf_mini                         99  9.5    0.002    0.003   25.780   25.894
 multiply_cannon                   2055 13.4    0.239    0.256   23.540   24.758
 multiply_cannon_loop              2055 14.4    1.408    1.463   22.188   22.825
 ot_mini                             99 10.5    0.001    0.001   14.778   14.918
 multiply_cannon_multrec          24660 15.4    4.073    6.771   13.088   14.142
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.040   12.154
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.020   12.040   12.153
 init_scf_loop                       11  6.9    0.000    0.000   11.727   11.728
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.658   10.760
 qs_ot_get_derivative                99 11.5    0.001    0.002   10.581   10.701
 prepare_preconditioner              11  7.9    0.000    0.000    9.840    9.854
 make_preconditioner                 11  8.9    0.000    0.000    9.840    9.854
 dbcsr_mm_accdrv_process          52304 16.0    7.876    9.264    8.857    9.782
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.084    9.534
 sum_up_and_integrate               110 10.3    0.001    0.002    6.224    6.248
 integrate_v_rspace                 110 11.3    0.003    0.003    6.198    6.220
 mp_waitall_1                    121746 16.5    4.162    6.143    4.162    6.143
 qs_ot_get_p                        110 10.4    0.001    0.001    5.895    6.058
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.796    5.807
 calculate_rho_elec                 110  8.6    0.078    0.081    5.796    5.806
 make_m2s                          4110 13.4    0.059    0.062    5.402    5.669
 init_scf_run                        11  5.9    0.000    0.001    5.598    5.598
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.598    5.598
 make_images                       4110 14.4    0.577    0.696    5.261    5.523
 cp_fm_upper_to_full                 70 14.2    3.347    4.846    3.347    4.846
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.067    4.172
 apply_single                       110 13.6    0.000    0.000    4.067    4.172
 ot_diis_step                        99 11.5    0.011    0.011    4.156    4.156
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    4.034    4.050
 dbcsr_complete_redistribute        325 12.2    0.414    0.456    2.619    3.756
 pw_transfer                       1331 11.6    0.066    0.075    3.655    3.699
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.548    3.596
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.518    3.583
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.470    3.470
 grid_integrate_task_list           110 12.3    3.288    3.458    3.288    3.458
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.341    3.399
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.108    3.224
 multiply_cannon_metrocomm3       24660 15.4    0.038    0.039    1.422    3.158
 calculate_dm_sparse                110  9.5    0.001    0.001    3.112    3.150
 fft_wrap_pw1pw2_140                451 13.1    0.609    0.630    3.049    3.099
 density_rs2pw                      110  9.6    0.004    0.004    2.918    3.086
 wfi_extrapolate                     11  7.9    0.001    0.001    2.983    2.983
 make_images_data                  4110 15.4    0.049    0.053    2.651    2.945
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.922    2.934
 cp_fm_diag_elpa_base                48 14.0    2.776    2.829    2.920    2.932
 hybrid_alltoall_any               4261 16.3    0.123    0.457    2.286    2.886
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.746    2.839
 mp_alltoall_i22                    605 13.7    1.655    2.839    1.655    2.839
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.788    2.790
 cp_fm_cholesky_invert               11 10.9    2.679    2.688    2.679    2.688
 multiply_cannon_sync_h2d         24660 15.4    2.366    2.556    2.366    2.556
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.515    2.518
 acc_transpose_blocks             24660 15.4    0.114    0.116    2.398    2.497
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.432    2.476
 grid_collocate_task_list           110  9.6    2.263    2.465    2.263    2.465
 fft3d_ps                          1111 14.6    1.085    1.112    2.432    2.460
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.229    2.230
 potential_pw2rs                    110 12.3    0.012    0.013    2.041    2.054
 mp_alltoall_d11v                  2046 13.8    1.774    1.984    1.774    1.984
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.809    1.847
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.614    1.712
 cp_fm_cholesky_decompose            22 10.9    1.652    1.701    1.652    1.701
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.667    1.685
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.625    1.634
 mp_sum_l                          6594 12.7    0.962    1.617    0.962    1.617
 mp_allgather_i34                  2055 14.4    0.438    1.529    0.438    1.529
 acc_transpose_blocks_sync        73980 16.4    1.391    1.483    1.391    1.483
 multiply_cannon_metrocomm4       20550 15.4    0.062    0.066    0.865    1.434
 transfer_rs2pw                     451 10.6    0.005    0.006    1.176    1.367
 mp_irecv_dv                      62702 16.1    0.760    1.354    0.760    1.354
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=65.744000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=692.000000, yerr=7.804428
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             860.758016E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66417.
 MP_Allreduce         9774                    644.
 MP_Sync                52
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.081    0.129   56.959   56.967
 qs_mol_dyn_low                       1  2.0    0.003    0.004   55.939   55.949
 qs_forces                           11  3.9    0.003    0.003   55.808   55.808
 qs_energies                         11  4.9    0.002    0.002   52.158   52.160
 scf_env_do_scf                      11  5.9    0.000    0.001   43.837   43.837
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   36.013   36.014
 velocity_verlet                     10  3.0    0.058    0.071   31.663   31.666
 dbcsr_multiply_generic            2055 12.4    0.106    0.111   23.609   23.715
 qs_scf_new_mos                      99  7.5    0.001    0.001   21.433   21.487
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   21.433   21.486
 ot_scf_mini                         99  9.5    0.002    0.003   20.171   20.219
 multiply_cannon                   2055 13.4    0.240    0.248   17.815   19.082
 multiply_cannon_loop              2055 14.4    0.603    0.629   16.509   16.730
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.661   11.697
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   11.661   11.697
 ot_mini                             99 10.5    0.001    0.001   11.054   11.096
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.372   10.403
 multiply_cannon_multrec           8220 15.4    3.172    4.532    7.555    8.590
 mp_waitall_1                    103326 16.6    6.109    7.875    6.109    7.875
 init_scf_loop                       11  6.9    0.000    0.000    7.776    7.777
 qs_ot_get_derivative                99 11.5    0.001    0.001    7.286    7.335
 sum_up_and_integrate               110 10.3    0.001    0.002    6.237    6.250
 integrate_v_rspace                 110 11.3    0.003    0.003    6.211    6.224
 prepare_preconditioner              11  7.9    0.000    0.000    6.102    6.106
 make_preconditioner                 11  8.9    0.000    0.000    6.101    6.105
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.003    6.014
 calculate_rho_elec                 110  8.6    0.113    0.114    6.002    6.013
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.669    5.737
 qs_ot_get_p                        110 10.4    0.001    0.001    5.095    5.161
 dbcsr_mm_accdrv_process          17442 15.9    3.069    4.122    4.242    5.154
 init_scf_run                        11  5.9    0.000    0.001    5.149    5.149
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.149    5.149
 make_m2s                          4110 13.4    0.038    0.039    4.357    4.592
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.020    3.151    4.498
 make_images                       4110 14.4    0.648    0.707    4.228    4.460
 pw_transfer                       1331 11.6    0.066    0.069    3.889    3.898
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.786    3.817
 apply_single                       110 13.6    0.000    0.000    3.785    3.816
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.781    3.793
 ot_diis_step                        99 11.5    0.012    0.012    3.741    3.741
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.658    3.662
 grid_integrate_task_list           110 12.3    3.395    3.506    3.395    3.506
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.306    3.307
 fft_wrap_pw1pw2_140                451 13.1    0.777    0.789    3.246    3.258
 density_rs2pw                      110  9.6    0.004    0.004    2.971    3.145
 cp_fm_cholesky_invert               11 10.9    2.951    2.954    2.951    2.954
 wfi_extrapolate                     11  7.9    0.001    0.001    2.798    2.798
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.712    2.720
 cp_fm_diag_elpa_base                48 14.0    2.655    2.682    2.711    2.718
 hybrid_alltoall_any               4261 16.3    0.200    0.849    2.311    2.697
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.683    2.683
 make_images_data                  4110 15.4    0.041    0.046    2.309    2.654
 calculate_dm_sparse                110  9.5    0.001    0.001    2.492    2.531
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.528    2.529
 grid_collocate_task_list           110  9.6    2.369    2.490    2.369    2.490
 multiply_cannon_sync_h2d          8220 15.4    2.353    2.477    2.353    2.477
 fft3d_ps                          1111 14.6    1.132    1.154    2.447    2.461
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.252    2.254
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.143    2.156
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.086    2.124
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.025    2.051
 potential_pw2rs                    110 12.3    0.015    0.015    2.007    2.012
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.788    1.990
 mp_alltoall_d11v                  2046 13.8    1.820    1.945    1.820    1.945
 cp_fm_cholesky_decompose            22 10.9    1.689    1.717    1.689    1.717
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.620    1.626
 qs_env_update_s_mstruct             11  6.9    0.012    0.018    1.509    1.615
 mp_allgather_i34                  2055 14.4    0.474    1.592    0.474    1.592
 dbcsr_complete_redistribute        325 12.2    0.556    0.573    1.496    1.584
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.437    1.449
 acc_transpose_blocks              8220 15.4    0.039    0.040    1.362    1.400
 transfer_rs2pw                     451 10.6    0.005    0.005    1.145    1.313
 qs_create_task_list                 11  7.9    0.001    0.001    1.214    1.310
 generate_qs_task_list               11  8.9    0.373    0.442    1.213    1.310
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.255    1.280
 multiply_cannon_metrocomm4        6165 15.4    0.019    0.020    0.469    1.261
 mp_waitany                        9240 13.8    1.056    1.235    1.056    1.235
 mp_irecv_dv                      24056 15.7    0.444    1.216    0.444    1.216
 jit_kernel_multiply                  7 15.5    0.861    1.203    0.861    1.203
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=56.967000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=810.090909, yerr=13.714323
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.419780E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67098.
 MP_Allreduce         9752                    812.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.025    0.047   87.551   87.555
 qs_mol_dyn_low                       1  2.0    0.003    0.003   87.194   87.225
 qs_forces                           11  3.9    0.003    0.003   87.125   87.126
 qs_energies                         11  4.9    0.024    0.033   83.030   83.032
 scf_env_do_scf                      11  5.9    0.000    0.001   72.882   72.882
 velocity_verlet                     10  3.0    0.002    0.002   55.821   55.827
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   44.605   44.606
 dbcsr_multiply_generic            2055 12.4    0.120    0.126   29.961   30.111
 init_scf_loop                       11  6.9    0.000    0.000   28.204   28.208
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.341   27.440
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.340   27.439
 prepare_preconditioner              11  7.9    0.000    0.000   26.200   26.213
 make_preconditioner                 11  8.9    0.000    0.000   26.200   26.213
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.572   25.670
 ot_scf_mini                         99  9.5    0.002    0.002   25.533   25.617
 multiply_cannon                   2055 13.4    0.332    0.360   22.690   23.347
 multiply_cannon_loop              2055 14.4    0.830    0.855   20.830   21.220
 cp_fm_upper_to_full                 70 14.2   12.657   18.003   12.657   18.003
 ot_mini                             99 10.5    0.001    0.001   14.468   14.550
 rebuild_ks_matrix                  110  8.3    0.000    0.001   13.578   13.675
 qs_ks_build_kohn_sham_matrix       110  9.3    0.014    0.014   13.578   13.675
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.311   12.398
 dbcsr_complete_redistribute        325 12.2    1.024    1.050    7.224   10.277
 multiply_cannon_multrec           8220 15.4    4.132    4.320    9.804    9.885
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.681    9.768
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.195    9.245
 mp_waitall_1                     84994 16.7    7.685    8.720    7.685    8.720
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.613    8.640
 mp_alltoall_i22                    605 13.7    5.252    8.314    5.252    8.314
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.086    7.136
 calculate_rho_elec                 110  8.6    0.223    0.224    7.085    7.135
 sum_up_and_integrate               110 10.3    0.002    0.002    6.704    6.720
 integrate_v_rspace                 110 11.3    0.004    0.004    6.676    6.692
 make_m2s                          4110 13.4    0.043    0.044    5.400    5.920
 qs_ot_get_p                        110 10.4    0.001    0.001    5.727    5.829
 init_scf_run                        11  5.9    0.000    0.001    5.774    5.774
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    5.774    5.774
 make_images                       4110 14.4    0.882    0.938    5.212    5.731
 dbcsr_mm_accdrv_process          11614 15.7    3.886    4.106    5.523    5.713
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.878    5.402
 apply_single                       110 13.6    0.000    0.000    4.878    5.402
 cp_fm_cholesky_invert               11 10.9    5.372    5.376    5.372    5.376
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.020    4.982    5.305
 pw_transfer                       1331 11.6    0.075    0.075    4.920    4.925
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    4.804    4.809
 ot_diis_step                        99 11.5    0.015    0.016    4.768    4.768
 fft_wrap_pw1pw2_140                451 13.1    1.279    1.284    4.193    4.202
 qs_ot_p2m_diag                      48 11.0    0.151    0.156    4.069    4.075
 grid_integrate_task_list           110 12.3    3.673    3.727    3.673    3.727
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    3.703    3.704
 hybrid_alltoall_any               4261 16.3    0.263    0.566    2.847    3.568
 density_rs2pw                      110  9.6    0.004    0.004    3.537    3.548
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.547    3.547
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.062    3.535
 make_images_data                  4110 15.4    0.045    0.048    2.857    3.503
 wfi_extrapolate                     11  7.9    0.001    0.001    3.344    3.344
 calculate_dm_sparse                110  9.5    0.001    0.001    3.221    3.248
 multiply_cannon_sync_h2d          8220 15.4    3.113    3.135    3.113    3.135
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.953    2.954
 cp_fm_diag_elpa_base                48 14.0    2.415    2.606    2.951    2.951
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.909    2.914
 fft3d_ps                          1111 14.6    1.296    1.307    2.856    2.862
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.681    2.729
 grid_collocate_task_list           110  9.6    2.663    2.685    2.663    2.685
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.381    2.414
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.312    2.314
 qs_env_update_s_mstruct             11  6.9    0.046    0.047    2.215    2.267
 potential_pw2rs                    110 12.3    0.021    0.021    2.239    2.245
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.100    2.163
 mp_alltoall_d11v                  2046 13.8    2.065    2.120    2.065    2.120
 cp_fm_cholesky_decompose            22 10.9    2.015    2.038    2.015    2.038
 qs_create_task_list                 11  7.9    0.001    0.001    1.878    1.921
 generate_qs_task_list               11  8.9    0.731    0.783    1.878    1.920
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.865    1.875
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.778    1.830
 acc_transpose_blocks              8220 15.4    0.041    0.041    1.737    1.781
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=87.555000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1264.181818, yerr=57.167725
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             628.367360E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175954870160
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  57905.
 MP_Allreduce        11059                    797.
 MP_Sync                87
 MP_Alltoall          2226                2529110.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.026  209.093  209.094
 qs_mol_dyn_low                       1  2.0    0.041    0.078  208.664  208.679
 qs_forces                           11  3.9    0.020    0.038  208.539  208.577
 qs_energies                         11  4.9    0.003    0.006  202.912  202.969
 scf_env_do_scf                      11  5.9    0.001    0.001  185.886  185.890
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  165.499  165.501
 dbcsr_multiply_generic            2507 12.6    0.180    0.183  126.388  127.638
 qs_scf_new_mos                     117  7.6    0.001    0.001  125.828  126.202
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  125.828  126.201
 velocity_verlet                     10  3.0    0.001    0.002  125.016  125.017
 ot_scf_mini                        117  9.6    0.003    0.003  119.115  119.496
 multiply_cannon                   2507 13.6    0.240    0.247  102.232  104.397
 multiply_cannon_loop              2507 14.6    2.407    2.458  100.052  102.219
 ot_mini                            117 10.6    0.001    0.001   66.780   67.125
 multiply_cannon_multrec          60168 15.6   32.024   35.008   41.816   44.033
 qs_ot_get_derivative               117 11.6    0.001    0.001   41.371   41.751
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.720   34.053
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.018   33.720   34.052
 mp_waitall_1                    267128 16.5   29.111   32.306   29.111   32.306
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.266   30.572
 qs_ot_get_p                        128 10.4    0.001    0.001   29.955   30.376
 multiply_cannon_sync_h2d         60168 15.6   26.549   28.757   26.549   28.757
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.694   25.799
 apply_single                       128 13.6    0.001    0.001   24.694   25.799
 ot_diis_step                       117 11.6    0.008    0.008   25.160   25.161
 qs_ot_p2m_diag                      83 11.4    0.079    0.091   23.302   23.375
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   20.368   20.369
 init_scf_loop                       11  6.9    0.000    0.001   20.315   20.316
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   19.677   19.961
 multiply_cannon_metrocomm3       60168 15.6    0.119    0.124   16.347   18.708
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   17.360   17.394
 cp_fm_diag_elpa_base                83 14.4   17.285   17.325   17.356   17.391
 prepare_preconditioner              11  7.9    0.000    0.000   15.726   15.789
 make_preconditioner                 11  8.9    0.000    0.000   15.726   15.789
 make_full_inverse_cholesky          11  9.9    0.000    0.000   14.959   15.185
 make_m2s                          5014 13.6    0.105    0.115   13.813   14.153
 sum_up_and_integrate               128 10.3    0.002    0.004   14.035   14.057
 integrate_v_rspace                 128 11.3    0.004    0.004   13.976   14.000
 make_images                       5014 14.6    0.397    0.416   13.630   13.985
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.280   13.442
 calculate_rho_elec                 128  8.7    0.045    0.064   13.279   13.441
 init_scf_run                        11  5.9    0.000    0.001   12.628   12.628
 scf_env_initial_rho_setup           11  6.9    0.003    0.004   12.627   12.628
 mp_sum_l                          7950 12.9    9.078   10.575    9.078   10.575
 dbcsr_mm_accdrv_process         124484 16.2    4.728    4.894    9.358    9.894
 wfi_extrapolate                     11  7.9    0.001    0.001    9.230    9.230
 cp_fm_cholesky_invert               11 10.9    8.926    8.933    8.926    8.933
 calculate_dm_sparse                128  9.5    0.001    0.001    8.548    8.662
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.286    8.446
 multiply_cannon_metrocomm1       60168 15.6    0.094    0.098    6.267    8.288
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    8.116    8.210
 pw_transfer                       1547 11.6    0.074    0.093    7.647    7.831
 make_images_data                  5014 15.6    0.067    0.072    6.722    7.645
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.012    7.446    7.633
 grid_integrate_task_list           128 12.3    6.992    7.500    6.992    7.500
 density_rs2pw                      128  9.7    0.006    0.007    6.916    7.425
 hybrid_alltoall_any               5200 16.5    0.297    2.275    5.903    7.237
 fft_wrap_pw1pw2_140                523 13.2    1.125    1.171    6.512    6.691
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.665    6.676
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.778    5.956
 fft3d_ps                          1291 14.7    2.193    2.847    5.452    5.785
 mp_alltoall_d11v                  2415 14.1    4.395    5.721    4.395    5.721
 grid_collocate_task_list           128  9.7    4.840    5.224    4.840    5.224
 cp_fm_cholesky_decompose            22 10.9    4.641    4.655    4.641    4.655
 mp_sum_d                          4465 12.1    3.706    4.592    3.706    4.592
 potential_pw2rs                    128 12.3    0.009    0.010    4.548    4.566
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=209.094000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=596.090909, yerr=5.299306
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022121472       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444702699520       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796573E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.166472E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499744       0.0%      0.0%    100.0%
 number of processed stacks               5925696       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1131.6
 marketing flops                   143.508480E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             839.049216E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2385600
 MPI messages size (bytes):
  total size                         4.069300E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.705776E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               70188               2295595008
     32768 < size <=   131072              716032              54973693952
    131072 < size <=  4194304             1363760            1386318135296
   4194304 < size <= 16777216              153648            1453842923456
  16777216 < size                           67056            1171888537600
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4005                  58170.
 MP_Allreduce        11091                    960.
 MP_Sync                86
 MP_Alltoall          1955                4875857.
 MP_SendRecv         11938                  47072.
 MP_ISendRecv        11938                  47072.
 MP_Wait             25718
 MP_ISend            11660                 212488.
 MP_IRecv            11660                 212488.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.010    0.031  190.179  190.180
 qs_mol_dyn_low                       1  2.0    0.003    0.004  189.794  189.808
 qs_forces                           11  3.9    0.004    0.004  189.704  189.706
 qs_energies                         11  4.9    0.002    0.003  182.925  182.935
 scf_env_do_scf                      11  5.9    0.001    0.001  166.079  166.089
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  132.440  132.442
 velocity_verlet                     10  3.0    0.001    0.002  119.556  119.558
 dbcsr_multiply_generic            2485 12.5    0.189    0.196   97.297   98.508
 qs_scf_new_mos                     116  7.6    0.001    0.001   93.984   94.525
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   93.983   94.524
 ot_scf_mini                        116  9.6    0.004    0.004   89.191   89.833
 multiply_cannon                   2485 13.5    0.503    0.569   76.967   81.488
 multiply_cannon_loop              2485 14.5    1.566    1.636   73.496   75.985
 ot_mini                            116 10.6    0.001    0.001   50.204   50.763
 mp_waitall_1                    212858 16.6   23.990   39.757   23.990   39.757
 multiply_cannon_multrec          29820 15.5   20.920   26.625   31.441   37.641
 init_scf_loop                       11  6.9    0.000    0.000   33.544   33.546
 rebuild_ks_matrix                  127  8.3    0.001    0.001   32.265   33.079
 qs_ks_build_kohn_sham_matrix       127  9.3    0.034    0.090   32.264   33.078
 qs_ks_update_qs_env                127  7.6    0.001    0.001   28.984   29.740
 multiply_cannon_metrocomm3       29820 15.5    0.097    0.103   15.525   29.439
 prepare_preconditioner              11  7.9    0.000    0.000   29.175   29.259
 make_preconditioner                 11  8.9    0.000    0.000   29.175   29.258
 qs_ot_get_derivative               116 11.6    0.001    0.002   28.375   29.013
 make_full_inverse_cholesky          11  9.9    0.000    0.000   27.812   28.385
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   21.910   23.063
 apply_single                       127 13.6    0.001    0.001   21.910   23.062
 qs_ot_get_p                        127 10.4    0.001    0.001   21.035   21.681
 ot_diis_step                       116 11.6    0.014    0.015   21.651   21.654
 multiply_cannon_sync_h2d         29820 15.5   17.914   21.078   17.914   21.078
 cp_fm_cholesky_invert               11 10.9   17.001   17.014   17.001   17.014
 qs_ot_p2m_diag                      82 11.4    0.186    0.214   16.077   16.111
 make_m2s                          4970 13.5    0.089    0.099   14.340   16.090
 make_images                       4970 14.5    1.155    1.358   14.130   15.879
 cp_dbcsr_syevd                      82 12.4    0.005    0.006   14.887   14.888
 sum_up_and_integrate               127 10.3    0.002    0.004   13.998   14.026
 integrate_v_rspace                 127 11.3    0.003    0.004   13.938   13.968
 qs_rho_update_rho_low              127  7.7    0.001    0.001   13.383   13.412
 calculate_rho_elec                 127  8.7    0.086    0.103   13.382   13.412
 init_scf_run                        11  5.9    0.000    0.001   12.006   12.008
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.006   12.008
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   11.722   11.753
 cp_fm_diag_elpa_base                82 14.4   11.453   11.562   11.715   11.742
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002   11.228   11.696
 multiply_cannon_metrocomm4       27335 15.5    0.105    0.116    3.809   10.728
 dbcsr_mm_accdrv_process          61726 16.2    5.406    6.097    9.971   10.519
 make_images_data                  4970 15.5    0.066    0.075    8.422   10.444
 mp_irecv_dv                      68888 16.3    3.606   10.331    3.606   10.331
 hybrid_alltoall_any               5155 16.4    0.349    1.541    7.104    9.905
 pw_transfer                       1535 11.6    0.084    0.095    8.666    8.730
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    8.444    8.513
 wfi_extrapolate                     11  7.9    0.001    0.001    8.455    8.455
 density_rs2pw                      127  9.7    0.006    0.007    7.093    7.603
 fft_wrap_pw1pw2_140                519 13.2    1.203    1.220    7.449    7.537
 grid_integrate_task_list           127 12.3    7.097    7.425    7.097    7.425
 cp_fm_cholesky_decompose            22 10.9    7.127    7.209    7.127    7.209
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.381    7.185
 calculate_dm_sparse                127  9.5    0.001    0.001    6.474    6.617
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.151    6.159
 fft3d_ps                          1281 14.7    2.815    2.992    6.023    6.073
 mp_sum_l                          7884 12.9    4.197    6.069    4.197    6.069
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.256    5.453
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    5.354    5.433
 grid_collocate_task_list           127  9.7    4.964    5.352    4.964    5.352
 mp_allgather_i34                  2485 14.5    1.997    5.111    1.997    5.111
 mp_alltoall_d11v                  2401 14.1    4.329    4.829    4.329    4.829
 potential_pw2rs                    127 12.3    0.016    0.018    4.513    4.525
 dbcsr_complete_redistribute        393 12.7    0.770    0.892    3.256    4.120
 mp_sum_d                          4453 12.1    2.639    3.913    2.639    3.913
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=190.180000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=799.818182, yerr=2.917601
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.928533E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks               3984192       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1695.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             947.240960E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1042912
 MPI messages size (bytes):
  total size                         2.716210E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.604448E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              281856              36943429632
    131072 < size <=  4194304              660064             996105256960
   4194304 < size <= 16777216               65632             931531265168
  16777216 < size                           28672             751619276800
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  58208.
 MP_Allreduce        11082                    999.
 MP_Sync                87
 MP_Alltoall          1712                9388896.
 MP_SendRecv          7936                  75008.
 MP_ISendRecv         7936                  75008.
 MP_Wait             21820
 MP_ISend            11748                 275205.
 MP_IRecv            11748                 275205.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.037    0.066  177.173  177.189
 qs_mol_dyn_low                       1  2.0    0.004    0.008  176.523  176.553
 qs_forces                           11  3.9    0.027    0.099  175.855  175.858
 qs_energies                         11  4.9    0.003    0.013  169.275  169.309
 scf_env_do_scf                      11  5.9    0.002    0.012  153.499  153.500
 scf_env_do_scf_inner_loop          117  6.6    0.006    0.029  118.032  118.033
 velocity_verlet                     10  3.0    0.002    0.002  114.010  114.030
 dbcsr_multiply_generic            2507 12.6    0.181    0.187   82.462   83.685
 qs_scf_new_mos                     117  7.6    0.001    0.001   80.789   81.124
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   80.788   81.123
 ot_scf_mini                        117  9.6    0.003    0.004   76.534   76.931
 multiply_cannon                   2507 13.6    0.511    0.528   62.617   67.566
 multiply_cannon_loop              2507 14.6    1.132    1.186   59.378   62.194
 ot_mini                            117 10.6    0.001    0.001   42.835   43.234
 init_scf_loop                       11  6.9    0.001    0.004   35.352   35.353
 mp_waitall_1                    170520 16.6   24.714   33.791   24.714   33.791
 prepare_preconditioner              11  7.9    0.000    0.000   31.313   31.355
 make_preconditioner                 11  8.9    0.000    0.002   31.313   31.355
 rebuild_ks_matrix                  128  8.3    0.001    0.001   30.338   30.763
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.022   30.337   30.762
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.943   30.353
 qs_ks_update_qs_env                128  7.6    0.001    0.001   27.337   27.723
 multiply_cannon_multrec          20056 15.6   13.098   16.328   22.421   25.725
 multiply_cannon_metrocomm3       20056 15.6    0.063    0.066   15.325   24.732
 qs_ot_get_derivative               117 11.6    0.002    0.002   23.282   23.675
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.637   20.675
 apply_single                       128 13.6    0.001    0.001   19.636   20.675
 qs_ot_get_p                        128 10.4    0.001    0.001   18.975   19.490
 ot_diis_step                       117 11.6    0.018    0.025   19.441   19.442
 make_m2s                          5014 13.6    0.079    0.084   14.696   16.187
 make_images                       5014 14.6    1.145    1.240   14.463   15.951
 multiply_cannon_sync_h2d         20056 15.6   13.644   15.253   13.644   15.253
 cp_fm_cholesky_invert               11 10.9   14.625   14.635   14.625   14.635
 qs_ot_p2m_diag                      83 11.4    0.265    0.272   14.618   14.626
 sum_up_and_integrate               128 10.3    0.002    0.003   13.960   13.986
 integrate_v_rspace                 128 11.3    0.003    0.004   13.900   13.923
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.612   13.645
 calculate_rho_elec                 128  8.7    0.130    0.145   13.611   13.645
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   13.527   13.537
 make_images_data                  5014 15.6    0.063    0.072    8.687   10.661
 init_scf_run                        11  5.9    0.000    0.001   10.565   10.565
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.564   10.565
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   10.320   10.338
 cp_fm_diag_elpa_base                83 14.4    9.895   10.071   10.317   10.334
 hybrid_alltoall_any               5200 16.5    0.449    2.052    7.583    9.947
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.203    9.468
 multiply_cannon_metrocomm4       17549 15.6    0.067    0.077    3.468    9.179
 mp_irecv_dv                      50230 16.2    3.339    8.920    3.339    8.920
 dbcsr_mm_accdrv_process          41502 16.2    5.651    5.939    8.785    8.908
 pw_transfer                       1547 11.6    0.085    0.104    8.755    8.865
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    8.531    8.649
 grid_integrate_task_list           128 12.3    7.300    7.932    7.300    7.932
 fft_wrap_pw1pw2_140                523 13.2    1.303    1.332    7.572    7.705
 wfi_extrapolate                     11  7.9    0.001    0.001    7.507    7.507
 cp_fm_cholesky_decompose            22 10.9    7.468    7.504    7.468    7.504
 cp_fm_upper_to_full                105 14.8    5.796    7.309    5.796    7.309
 density_rs2pw                      128  9.7    0.006    0.006    6.901    7.301
 dbcsr_complete_redistribute        395 12.7    1.168    1.200    4.655    6.425
 fft3d_ps                          1291 14.7    2.763    2.994    5.904    5.995
 calculate_dm_sparse                128  9.5    0.001    0.001    5.881    5.974
 grid_collocate_task_list           128  9.7    5.163    5.634    5.163    5.634
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.487    5.492
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.699    5.400
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.481    5.246
 mp_alltoall_d11v                  2415 14.1    4.470    5.039    4.470    5.039
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.575    4.696
 mp_sum_l                          7950 12.9    3.344    4.675    3.344    4.675
 mp_allgather_i34                  2507 14.6    1.731    4.586    1.731    4.586
 potential_pw2rs                    128 12.3    0.020    0.022    4.356    4.365
 transfer_fm_to_dbcsr                11  9.9    0.019    0.023    2.350    4.080
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.927    3.950
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.892    3.917
 mp_alltoall_i22                    716 14.1    1.988    3.890    1.988    3.890
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.505    3.545
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=177.189000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=894.636364, yerr=8.070966
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.353788E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               5977344       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.138565E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1143192
 MPI messages size (bytes):
  total size                         2.023815E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.770319E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              319024              36042702848
    131072 < size <=  4194304              715736             785529176064
   4194304 < size <= 16777216               70320             665379241840
  16777216 < size                           30720             536870912000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58348.
 MP_Allreduce        11057                   1083.
 MP_Sync                87
 MP_Alltoall          1712               12503107.
 MP_SendRecv          5888                  75008.
 MP_ISendRecv         5888                  75008.
 MP_Wait             22442
 MP_ISend            14952                 244818.
 MP_IRecv            14952                 244818.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.034  192.071  192.072
 qs_mol_dyn_low                       1  2.0    0.005    0.032  191.503  191.517
 qs_forces                           11  3.9    0.033    0.034  191.384  191.393
 qs_energies                         11  4.9    0.002    0.004  184.195  184.206
 scf_env_do_scf                      11  5.9    0.001    0.001  167.366  167.380
 velocity_verlet                     10  3.0    0.002    0.002  126.711  126.714
 scf_env_do_scf_inner_loop          117  6.6    0.009    0.019  120.351  120.352
 qs_scf_new_mos                     117  7.6    0.001    0.001   83.725   84.077
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   83.725   84.076
 dbcsr_multiply_generic            2507 12.6    0.189    0.196   81.148   81.908
 ot_scf_mini                        117  9.6    0.003    0.003   79.139   79.509
 multiply_cannon                   2507 13.6    0.554    0.591   56.202   58.753
 multiply_cannon_loop              2507 14.6    1.822    1.892   52.315   54.151
 init_scf_loop                       11  6.9    0.000    0.000   46.887   46.888
 ot_mini                            117 10.6    0.001    0.001   43.516   43.883
 prepare_preconditioner              11  7.9    0.000    0.000   42.770   42.803
 make_preconditioner                 11  8.9    0.000    0.000   42.770   42.803
 make_full_inverse_cholesky          11  9.9    0.011    0.021   36.429   41.383
 multiply_cannon_multrec          30084 15.6   13.391   19.335   26.254   31.857
 rebuild_ks_matrix                  128  8.3    0.001    0.001   29.456   29.772
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.019   29.456   29.771
 mp_waitall_1                    147882 16.7   17.770   28.301   17.770   28.301
 qs_ks_update_qs_env                128  7.6    0.001    0.001   26.576   26.858
 qs_ot_get_derivative               117 11.6    0.002    0.002   23.722   24.089
 qs_ot_get_p                        128 10.4    0.001    0.001   21.657   22.049
 make_m2s                          5014 13.6    0.096    0.101   20.428   21.541
 make_images                       5014 14.6    1.940    2.273   20.118   21.231
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   19.198   19.819
 apply_single                       128 13.6    0.001    0.001   19.198   19.819
 ot_diis_step                       117 11.6    0.017    0.018   19.674   19.675
 qs_ot_p2m_diag                      83 11.4    0.343    0.390   17.338   17.392
 cp_fm_upper_to_full                105 14.8   11.422   16.789   11.422   16.789
 cp_fm_cholesky_invert               11 10.9   16.544   16.553   16.544   16.553
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   15.906   15.907
 multiply_cannon_metrocomm3       30084 15.6    0.050    0.053    6.818   15.334
 sum_up_and_integrate               128 10.3    0.002    0.003   14.155   14.181
 integrate_v_rspace                 128 11.3    0.003    0.005   14.095   14.123
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.986   14.020
 calculate_rho_elec                 128  8.7    0.172    0.188   13.986   14.020
 dbcsr_mm_accdrv_process          62264 16.2    8.473    9.240   12.430   12.914
 dbcsr_complete_redistribute        395 12.7    1.497    1.624    9.168   12.895
 make_images_data                  5014 15.6    0.066    0.073   10.979   12.791
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   12.545   12.555
 cp_fm_diag_elpa_base                83 14.4   11.574   11.893   12.537   12.548
 hybrid_alltoall_any               5200 16.5    0.530    2.203    9.862   11.999
 copy_fm_to_dbcsr                   209 11.7    0.001    0.002    7.734   11.444
 multiply_cannon_sync_h2d         30084 15.6   10.412   11.165   10.412   11.165
 init_scf_run                        11  5.9    0.000    0.001   10.850   10.852
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.850   10.851
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.998   10.266
 transfer_fm_to_dbcsr                11  9.9    0.001    0.006    6.319    9.957
 mp_alltoall_i22                    716 14.1    5.598    9.359    5.598    9.359
 pw_transfer                       1547 11.6    0.085    0.101    9.153    9.238
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    8.927    9.019
 fft_wrap_pw1pw2_140                523 13.2    1.441    1.475    7.911    8.013
 grid_integrate_task_list           128 12.3    7.523    7.860    7.523    7.860
 cp_fm_cholesky_decompose            22 10.9    7.735    7.826    7.735    7.826
 wfi_extrapolate                     11  7.9    0.001    0.001    7.764    7.764
 multiply_cannon_metrocomm4       25070 15.6    0.084    0.094    2.895    7.559
 mp_irecv_dv                      76098 16.2    2.738    7.279    2.738    7.279
 density_rs2pw                      128  9.7    0.006    0.006    6.947    7.150
 calculate_dm_sparse                128  9.5    0.001    0.001    6.300    6.367
 fft3d_ps                          1291 14.7    2.852    2.917    6.108    6.179
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.485    5.527
 grid_collocate_task_list           128  9.7    5.290    5.506    5.290    5.506
 mp_alltoall_d11v                  2415 14.1    5.155    5.449    5.155    5.449
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.547    4.645
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.390    4.511
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    4.469    4.470
 potential_pw2rs                    128 12.3    0.023    0.024    4.360    4.374
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    4.293    4.345
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=192.072000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1074.363636, yerr=15.047034
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430454546432       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1975684956160       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992006770688       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613065416704       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239182565376       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239182565376       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.245913E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.910926E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806580192       0.0%      0.0%    100.0%
 number of processed stacks               1979136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3439.2
 marketing flops                   145.663816E+12
 -------------------------------------------------------------------------------
 # multiplications                           2535
 max memory usage/rank               1.518428E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  243360
 MPI messages size (bytes):
  total size                         1.342108E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.514910E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              115584              60599304192
   4194304 < size <= 16777216              105840             554906419200
  16777216 < size                           20352             726592540656
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         9028                     51.
 MP_Alltoall          9736                 793528.
 MP_ISend            40516                2095952.
 MP_IRecv            40516                2095058.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4041                  57652.
 MP_Allreduce        11179                   1163.
 MP_Sync                88
 MP_Alltoall          1724               18848050.
 MP_SendRecv          3870                 122880.
 MP_ISendRecv         3870                 122880.
 MP_Wait             16244
 MP_ISend            10760                 423501.
 MP_IRecv            10760                 423501.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.030  174.361  174.361
 qs_mol_dyn_low                       1  2.0    0.003    0.004  173.972  173.985
 qs_forces                           11  3.9    0.004    0.004  173.655  173.657
 qs_energies                         11  4.9    0.002    0.002  166.201  166.208
 scf_env_do_scf                      11  5.9    0.001    0.001  148.870  148.883
 velocity_verlet                     10  3.0    0.002    0.002  114.195  114.213
 scf_env_do_scf_inner_loop          118  6.6    0.003    0.008  112.915  112.916
 qs_scf_new_mos                     118  7.6    0.001    0.001   75.574   75.698
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   75.574   75.697
 dbcsr_multiply_generic            2535 12.6    0.184    0.194   73.610   74.052
 ot_scf_mini                        118  9.6    0.003    0.004   71.091   71.209
 multiply_cannon                   2535 13.6    0.568    0.586   54.251   57.728
 multiply_cannon_loop              2535 14.6    0.817    0.867   51.145   52.124
 ot_mini                            118 10.6    0.001    0.001   38.503   38.621
 init_scf_loop                       11  6.9    0.000    0.000   35.800   35.801
 mp_waitall_1                    127164 16.7   25.961   33.123   25.961   33.123
 prepare_preconditioner              11  7.9    0.000    0.000   31.916   31.955
 make_preconditioner                 11  8.9    0.000    0.000   31.916   31.955
 make_full_inverse_cholesky          11  9.9    0.015    0.027   29.778   30.039
 rebuild_ks_matrix                  129  8.3    0.001    0.001   29.451   29.568
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.018   29.450   29.567
 qs_ks_update_qs_env                129  7.6    0.001    0.001   26.864   26.970
 multiply_cannon_multrec          10140 15.6   10.387   15.069   17.962   23.115
 multiply_cannon_metrocomm3       10140 15.6    0.026    0.027   13.355   21.382
 qs_ot_get_derivative               118 11.6    0.001    0.002   21.091   21.203
 qs_ot_get_p                        129 10.4    0.001    0.001   19.555   19.682
 cp_fm_cholesky_invert               11 10.9   18.263   18.270   18.263   18.270
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   17.421   17.585
 apply_single                       129 13.6    0.001    0.001   17.420   17.584
 ot_diis_step                       118 11.6    0.020    0.021   17.336   17.337
 make_m2s                          5070 13.6    0.067    0.070   15.331   16.251
 make_images                       5070 14.6    2.175    2.606   15.022   15.947
 qs_ot_p2m_diag                      84 11.4    0.502    0.507   15.574   15.590
 sum_up_and_integrate               129 10.3    0.002    0.002   14.311   14.359
 cp_dbcsr_syevd                      84 12.4    0.005    0.006   14.356   14.356
 integrate_v_rspace                 129 11.3    0.004    0.004   14.251   14.300
 qs_rho_update_rho_low              129  7.7    0.001    0.001   14.254   14.290
 calculate_rho_elec                 129  8.7    0.257    0.268   14.253   14.289
 cp_fm_diag_elpa                     84 13.4    0.000    0.000   11.334   11.346
 cp_fm_diag_elpa_base                84 14.4   11.089   11.172   11.329   11.342
 multiply_cannon_sync_h2d         10140 15.6   10.714   11.254   10.714   11.254
 init_scf_run                        11  5.9    0.000    0.001   10.483   10.483
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   10.482   10.483
 make_images_data                  5070 15.6    0.056    0.064    8.889   10.200
 hybrid_alltoall_any               5257 16.5    0.848    3.807    8.627   10.138
 pw_transfer                       1559 11.6    0.086    0.096    9.490    9.518
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.011    9.266    9.302
 qs_ot_get_derivative_diag           78 12.4    0.002    0.003    8.398    8.482
 grid_integrate_task_list           129 12.3    7.788    8.242    7.788    8.242
 fft_wrap_pw1pw2_140                527 13.2    1.821    1.857    8.175    8.218
 cp_fm_cholesky_decompose            22 10.9    7.932    8.028    7.932    8.028
 dbcsr_mm_accdrv_process          20958 16.1    3.311    4.520    7.229    7.907
 multiply_cannon_metrocomm1       10140 15.6    0.030    0.031    4.425    7.701
 wfi_extrapolate                     11  7.9    0.001    0.001    7.478    7.478
 density_rs2pw                      129  9.7    0.006    0.006    6.865    7.199
 calculate_dm_sparse                129  9.5    0.001    0.001    6.144    6.242
 fft3d_ps                          1301 14.7    2.789    2.859    5.963    6.008
 multiply_cannon_metrocomm4        7605 15.6    0.027    0.030    1.886    5.993
 mp_irecv_dv                      29150 15.9    1.846    5.896    1.846    5.896
 grid_collocate_task_list           129  9.7    5.616    5.896    5.616    5.896
 dbcsr_complete_redistribute        397 12.7    2.110    2.166    5.353    5.699
 mp_alltoall_d11v                  2429 14.1    4.879    5.579    4.879    5.579
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    5.247    5.248
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.205    5.224
 mp_allgather_i34                  2535 14.6    1.280    4.929    1.280    4.929
 potential_pw2rs                    129 12.3    0.026    0.027    4.364    4.373
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.076    4.130
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.593    3.902
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.682    3.773
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    3.718    3.762
 copy_fm_to_dbcsr                   210 11.7    0.001    0.002    3.456    3.761
 copy_dbcsr_to_fm                   187 11.8    0.004    0.004    3.537    3.660
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.499    3.508
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=174.361000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1432.909091, yerr=26.441890
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1399806074880       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1917702897664       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1943685697536       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1949313282048       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2694948675584       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4338990710784       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5303210868736       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5347334946816       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6535631601664       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11360521838592       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       14909203279872       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       14909203279872       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19481731219456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.091284E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.518198E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6655257696       0.0%      0.0%    100.0%
 number of processed stacks               1933776       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3441.6
 marketing flops                   142.449033E+12
 -------------------------------------------------------------------------------
 # multiplications                           2469
 max memory usage/rank               3.185193E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   98760
 MPI messages size (bytes):
  total size                         1.118850E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.328979E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               44568              34603008000
   4194304 < size <= 16777216               43616             373376942080
  16777216 < size                            9960             710867104896
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4018                  58907.
 MP_Allreduce        11122                   1503.
 MP_Sync                87
 MP_Alltoall          1688               36934197.
 MP_SendRecv          1764                 218624.
 MP_ISendRecv         1764                 218624.
 MP_Wait              9654
 MP_ISend             6312                1080635.
 MP_IRecv             6312                1080635.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.048    0.084  285.330  285.336
 qs_mol_dyn_low                       1  2.0    0.003    0.004  284.748  284.778
 qs_forces                           11  3.9    0.005    0.005  284.651  284.653
 qs_energies                         11  4.9    0.002    0.002  275.851  275.861
 scf_env_do_scf                      11  5.9    0.001    0.001  253.337  253.349
 velocity_verlet                     10  3.0    0.002    0.002  206.059  206.067
 scf_env_do_scf_inner_loop          115  6.6    0.004    0.008  130.582  130.585
 init_scf_loop                       11  6.9    0.000    0.000  122.479  122.483
 prepare_preconditioner              11  7.9    0.000    0.000  117.657  117.677
 make_preconditioner                 11  8.9    0.000    0.000  117.657  117.677
 make_full_inverse_cholesky          11  9.9    0.038    0.039   94.315  114.827
 qs_scf_new_mos                     115  7.6    0.001    0.001   88.118   88.182
 qs_scf_loop_do_ot                  115  8.6    0.001    0.001   88.118   88.181
 ot_scf_mini                        115  9.6    0.004    0.004   83.350   83.355
 dbcsr_multiply_generic            2469 12.5    0.210    0.222   80.414   80.841
 cp_fm_upper_to_full                105 14.8   52.247   74.582   52.247   74.582
 multiply_cannon                   2469 13.5    0.674    0.725   57.789   59.405
 multiply_cannon_loop              2469 14.5    1.029    1.053   53.823   54.996
 ot_mini                            115 10.6    0.001    0.001   43.103   43.111
 dbcsr_complete_redistribute        395 12.7    3.998    4.085   29.311   41.834
 copy_fm_to_dbcsr                   209 11.7    0.001    0.001   25.796   38.244
 transfer_fm_to_dbcsr                11  9.9    0.030    0.031   23.299   35.558
 mp_alltoall_i22                    712 14.1   21.053   33.612   21.053   33.612
 rebuild_ks_matrix                  126  8.3    0.001    0.001   33.318   33.332
 qs_ks_build_kohn_sham_matrix       126  9.3    0.027    0.028   33.318   33.331
 cp_fm_cholesky_invert               11 10.9   33.046   33.052   33.046   33.052
 mp_waitall_1                    102102 16.8   27.271   31.706   27.271   31.706
 qs_ks_update_qs_env                126  7.6    0.001    0.001   30.868   30.893
 qs_ot_get_p                        126 10.4    0.001    0.001   25.395   25.427
 qs_ot_get_derivative               115 11.6    0.002    0.002   24.147   24.153
 qs_ot_p2m_diag                      83 11.4    0.879    0.884   21.426   21.455
 cp_dbcsr_syevd                      83 12.4    0.006    0.006   19.638   19.641
 make_m2s                          4938 13.5    0.074    0.077   18.164   19.628
 multiply_cannon_metrocomm3        9876 15.5    0.025    0.026   18.361   19.441
 make_images                       4938 14.5    3.012    3.203   17.691   19.157
 ot_diis_step                       115 11.6    0.021    0.021   18.932   18.933
 apply_preconditioner_dbcsr         126 12.6    0.000    0.000   18.312   18.390
 apply_single                       126 13.6    0.001    0.001   18.311   18.390
 multiply_cannon_multrec           9876 15.5   10.124   11.905   17.787   17.871
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   16.457   16.458
 cp_fm_diag_elpa_base                83 14.4   12.078   13.693   16.454   16.454
 qs_rho_update_rho_low              126  7.7    0.001    0.001   16.160   16.175
 calculate_rho_elec                 126  8.7    0.472    0.473   16.160   16.174
 sum_up_and_integrate               126 10.3    0.002    0.002   15.744   15.835
 integrate_v_rspace                 126 11.3    0.004    0.004   15.682   15.774
 multiply_cannon_sync_h2d          9876 15.5   14.083   14.112   14.083   14.112
 hybrid_alltoall_any               5124 16.4    1.293    3.007   10.508   12.810
 make_images_data                  4938 15.5    0.064    0.070   10.206   12.374
 init_scf_run                        11  5.9    0.000    0.001   12.162   12.163
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.162   12.163
 pw_transfer                       1523 11.6    0.093    0.094   11.107   11.112
 fft_wrap_pw1pw2                   1271 12.7    0.011    0.011   10.872   10.877
 fft_wrap_pw1pw2_140                515 13.2    3.009    3.082    9.708    9.713
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.685    9.688
 dbcsr_mm_accdrv_process          20442 16.0    4.260    6.058    7.417    9.269
 wfi_extrapolate                     11  7.9    0.001    0.001    9.049    9.050
 cp_fm_cholesky_decompose            22 10.9    8.833    8.853    8.833    8.853
 grid_integrate_task_list           126 12.3    8.438    8.623    8.438    8.623
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    8.160    8.160
 density_rs2pw                      126  9.7    0.005    0.005    7.318    7.370
 mp_alltoall_d11v                  2399 14.1    6.856    6.976    6.856    6.976
 calculate_dm_sparse                126  9.5    0.001    0.001    6.663    6.741
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.423    6.514
 grid_collocate_task_list           126  9.7    6.319    6.391    6.319    6.391
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    6.174    6.295
 fft3d_ps                          1271 14.7    2.806    2.812    6.183    6.232
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=285.336000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2878.000000, yerr=155.169116
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.260007E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255646.
 MP_Allreduce         3139                   6114.
 MP_Sync                 4
 MP_Alltoall            54
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.040   84.819   84.820
 qs_energies                          1  2.0    0.000    0.000   84.353   84.360
 ls_scf                               1  3.0    0.000    0.000   83.451   83.458
 dbcsr_multiply_generic             111  6.7    0.014    0.015   72.433   72.648
 multiply_cannon                    111  7.7    0.018    0.021   55.790   56.846
 multiply_cannon_loop               111  8.7    0.226    0.240   52.365   53.718
 ls_scf_main                          1  4.0    0.000    0.000   52.193   52.194
 density_matrix_trs4                  2  5.0    0.002    0.003   46.687   46.759
 ls_scf_init_scf                      1  4.0    0.000    0.000   28.219   28.221
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   27.096   27.147
 mp_waitall_1                     11031 10.9   22.499   25.481   22.499   25.481
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   24.944   24.963
 multiply_cannon_multrec           2664  9.7    8.105    8.852   15.462   17.161
 multiply_cannon_sync_h2d          2664  9.7   13.490   15.546   13.490   15.546
 make_m2s                           222  7.7    0.009    0.011   13.018   13.608
 make_images                        222  8.7    0.098    0.107   12.996   13.589
 multiply_cannon_metrocomm1        2664  9.7    0.010    0.011    9.699   12.201
 make_images_data                   222  9.7    0.004    0.005    7.609    8.158
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.522    8.063
 dbcsr_mm_accdrv_process           4760 10.4    0.583    0.697    6.974    7.924
 hybrid_alltoall_any                227 10.6    0.215    1.842    6.507    7.765
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.193    7.073    6.193    7.073
 calculate_norms                   4752  9.8    5.484    6.115    5.484    6.115
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.003    5.094
 mp_sum_l                           887  5.1    3.089    4.699    3.089    4.699
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.015    2.050    3.601
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.345    3.596
 mp_irecv_dv                       6231 10.9    2.033    3.573    2.033    3.573
 make_images_sizes                  222  9.7    0.000    0.000    0.705    3.565
 mp_alltoall_i44                    222 10.7    0.705    3.565    0.705    3.565
 arnoldi_extremal                     4  6.8    0.000    0.000    3.237    3.266
 arnoldi_normal_ev                    4  7.8    0.001    0.002    3.237    3.266
 build_subspace                      16  8.4    0.009    0.012    3.146    3.148
 ls_scf_post                          1  4.0    0.000    0.000    3.039    3.045
 ls_scf_store_result                  1  5.0    0.000    0.000    2.862    2.898
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.352    2.749
 dbcsr_merge_single_wm              555 10.7    0.462    0.593    2.343    2.741
 make_images_pack                   222  9.7    2.206    2.618    2.208    2.620
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.014    2.328    2.577
 dbcsr_sort_data                    658 11.4    2.138    2.478    2.138    2.478
 dbcsr_matrix_vector_mult_local     304 10.0    2.066    2.466    2.068    2.467
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.284    2.367
 buffer_matrices_ensure_size        222  8.7    1.757    2.042    1.757    2.042
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.757    1.759
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.748    1.749
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.002    1.748    1.749
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=84.820000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1131.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.175455E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  10075.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.044   89.720   89.722
 qs_energies                          1  2.0    0.000    0.000   89.192   89.196
 ls_scf                               1  3.0    0.000    0.000   87.839   87.843
 dbcsr_multiply_generic             111  6.7    0.015    0.016   73.966   74.325
 multiply_cannon                    111  7.7    0.032    0.056   52.724   56.607
 ls_scf_main                          1  4.0    0.000    0.000   54.223   54.227
 multiply_cannon_loop               111  8.7    0.136    0.148   49.890   52.728
 density_matrix_trs4                  2  5.0    0.002    0.003   48.588   48.784
 ls_scf_init_scf                      1  4.0    0.000    0.000   29.901   29.902
 mp_waitall_1                      9105 10.9   20.639   29.132   20.639   29.132
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.686   28.782
 multiply_cannon_multrec           1332  9.7   13.315   17.470   22.571   27.836
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.280   26.290
 multiply_cannon_metrocomm3        1332  9.7    0.007    0.008   11.583   19.667
 make_m2s                           222  7.7    0.006    0.007   14.904   15.489
 make_images                        222  8.7    1.366    1.693   14.873   15.460
 dbcsr_mm_accdrv_process           4041 10.4    0.350    0.507    8.850   10.410
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.369    9.875    8.369    9.875
 make_images_data                   222  9.7    0.004    0.005    8.612    9.529
 hybrid_alltoall_any                227 10.6    0.545    2.604    8.004    9.440
 multiply_cannon_metrocomm4        1221  9.7    0.007    0.009    3.195    7.772
 mp_irecv_dv                       3311 11.0    3.175    7.717    3.175    7.717
 mp_sum_l                           887  5.1    4.869    7.664    4.869    7.664
 calculate_norms                   2376  9.8    6.074    6.813    6.074    6.813
 multiply_cannon_sync_h2d          1332  9.7    4.876    6.517    4.876    6.517
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.692    6.169
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.890    5.078
 arnoldi_extremal                     4  6.8    0.000    0.000    4.630    4.650
 arnoldi_normal_ev                    4  7.8    0.001    0.005    4.630    4.650
 build_subspace                      16  8.4    0.014    0.021    4.375    4.378
 ls_scf_post                          1  4.0    0.000    0.000    3.716    3.719
 ls_scf_store_result                  1  5.0    0.000    0.000    3.400    3.541
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.124    3.353
 dbcsr_matrix_vector_mult_local     304 10.0    2.740    3.217    2.742    3.219
 mp_allgather_i34                   111  8.7    0.817    3.014    0.817    3.014
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.595    2.674
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.094    2.488
 dbcsr_data_new                    4174 10.1    2.114    2.416    2.114    2.416
 make_images_pack                   222  9.7    1.819    2.117    1.821    2.119
 dbcsr_sort_data                    436 11.2    1.832    2.067    1.832    2.067
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.873    1.875
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.860    1.862
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.003    1.860    1.862
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=89.722000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1774.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.902999E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265448.
 MP_Allreduce         3138                  10896.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.045   91.990   91.992
 qs_energies                          1  2.0    0.000    0.000   91.472   91.475
 ls_scf                               1  3.0    0.000    0.000   90.064   90.068
 dbcsr_multiply_generic             111  6.7    0.016    0.016   74.825   75.097
 ls_scf_main                          1  4.0    0.000    0.000   56.281   56.285
 multiply_cannon                    111  7.7    0.032    0.063   51.906   55.992
 multiply_cannon_loop               111  8.7    0.117    0.131   49.108   52.760
 density_matrix_trs4                  2  5.0    0.002    0.003   50.464   50.601
 mp_waitall_1                      7281 11.0   23.508   32.793   23.508   32.793
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.132   30.135
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.939   29.015
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.604   26.614
 multiply_cannon_multrec            888  9.7   12.636   15.168   21.189   24.536
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   10.991   22.373
 make_m2s                           222  7.7    0.006    0.007   16.387   17.196
 make_images                        222  8.7    1.576    1.841   16.350   17.157
 make_images_data                   222  9.7    0.004    0.004    9.567   10.654
 hybrid_alltoall_any                227 10.6    0.641    2.936    9.066   10.288
 dbcsr_mm_accdrv_process           3754 10.4    0.328    0.495    8.072    9.308
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.639    8.814    7.639    8.814
 mp_sum_l                           887  5.1    4.844    8.035    4.844    8.035
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.444    7.287
 mp_irecv_dv                       2335 11.1    2.428    7.232    2.428    7.232
 multiply_cannon_sync_h2d           888  9.7    6.080    7.183    6.080    7.183
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.596    7.000
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.717    6.188
 arnoldi_extremal                     4  6.8    0.000    0.000    5.072    5.085
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.072    5.085
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.659    4.903
 build_subspace                      16  8.4    0.014    0.020    4.769    4.775
 calculate_norms                   1584  9.8    4.343    4.710    4.343    4.710
 mp_allgather_i34                   111  8.7    0.921    3.828    0.921    3.828
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.021    3.431    3.769
 ls_scf_post                          1  4.0    0.000    0.000    3.650    3.654
 dbcsr_matrix_vector_mult_local     304 10.0    3.021    3.603    3.023    3.605
 ls_scf_store_result                  1  5.0    0.000    0.000    3.398    3.472
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.837    2.927
 dbcsr_data_new                    4116  9.9    2.111    2.453    2.111    2.453
 dbcsr_sort_data                    325 11.1    1.914    2.146    1.914    2.146
 make_images_sizes                  222  9.7    0.000    0.000    0.961    2.015
 mp_alltoall_i44                    222 10.7    0.961    2.015    0.961    2.015
 dbcsr_finalize                     304  7.8    0.026    0.032    1.604    1.913
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.882    1.884
 make_images_pack                   222  9.7    1.620    1.868    1.623    1.870
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.864    1.865
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.002    1.864    1.865
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=91.992000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2214.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.393270E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  13030.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.029    0.047   96.541   96.543
 qs_energies                          1  2.0    0.000    0.000   95.846   95.851
 ls_scf                               1  3.0    0.000    0.000   94.192   94.196
 dbcsr_multiply_generic             111  6.7    0.017    0.018   78.062   78.328
 ls_scf_main                          1  4.0    0.000    0.000   58.450   58.451
 multiply_cannon                    111  7.7    0.044    0.121   51.249   55.868
 density_matrix_trs4                  2  5.0    0.002    0.003   52.370   52.479
 multiply_cannon_loop               111  8.7    0.153    0.168   46.215   49.422
 ls_scf_init_scf                      1  4.0    0.000    0.000   32.537   32.539
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   31.321   31.402
 mp_waitall_1                      6369 11.0   22.242   28.895   22.242   28.895
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   28.738   28.755
 multiply_cannon_multrec           1332  9.7   14.200   17.546   22.086   25.363
 make_m2s                           222  7.7    0.006    0.008   21.066   22.497
 make_images                        222  8.7    3.134    3.604   21.016   22.449
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.004    8.864   17.052
 make_images_data                   222  9.7    0.004    0.004   11.740   13.366
 hybrid_alltoall_any                227 10.6    0.795    3.775   11.055   12.809
 dbcsr_mm_accdrv_process           3641 10.4    0.315    0.484    7.524    9.042
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.088    8.546    7.088    8.546
 mp_sum_l                           887  5.1    4.158    7.455    4.158    7.455
 multiply_cannon_sync_h2d          1332  9.7    5.423    6.198    5.423    6.198
 multiply_cannon_metrocomm4        1110  9.7    0.005    0.007    2.092    6.111
 mp_irecv_dv                       3229 10.9    2.067    6.030    2.067    6.030
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.196    5.772
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.466    5.539
 arnoldi_extremal                     4  6.8    0.000    0.000    5.160    5.175
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.160    5.174
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.664    4.842
 build_subspace                      16  8.4    0.014    0.020    4.832    4.839
 calculate_norms                   2376  9.8    4.185    4.544    4.185    4.544
 mp_allgather_i34                   111  8.7    2.144    4.488    2.144    4.488
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.021    3.575    3.864
 dbcsr_matrix_vector_mult_local     304 10.0    3.195    3.691    3.197    3.693
 dbcsr_sort_data                    658 11.4    3.072    3.396    3.072    3.396
 ls_scf_post                          1  4.0    0.000    0.000    3.205    3.209
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.817    3.148
 dbcsr_merge_single_wm              555 10.7    0.536    0.651    2.808    3.140
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.987    3.022
 ls_scf_store_result                  1  5.0    0.000    0.000    2.957    3.010
 dbcsr_data_release               10477 10.7    1.579    2.409    1.579    2.409
 dbcsr_finalize                     304  7.8    0.049    0.061    1.799    1.965
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=96.543000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2794.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.727308E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265536.
 MP_Allreduce         3129                  15263.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.085    0.092   91.411   91.427
 qs_energies                          1  2.0    0.000    0.000   90.639   90.641
 ls_scf                               1  3.0    0.000    0.000   88.703   88.706
 dbcsr_multiply_generic             111  6.7    0.018    0.019   70.297   70.489
 ls_scf_main                          1  4.0    0.000    0.000   56.045   56.045
 multiply_cannon                    111  7.7    0.105    0.172   52.079   55.648
 multiply_cannon_loop               111  8.7    0.088    0.093   49.532   50.697
 density_matrix_trs4                  2  5.0    0.002    0.003   49.138   49.193
 ls_scf_init_scf                      1  4.0    0.000    0.000   29.246   29.247
 mp_waitall_1                      5436 11.0   23.990   28.573   23.990   28.573
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.007   28.045
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   25.916   25.928
 multiply_cannon_multrec            444  9.7   13.718   16.377   20.788   22.862
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.303   15.279
 multiply_cannon_metrocomm3         444  9.7    0.001    0.002    6.192   14.754
 make_m2s                           222  7.7    0.005    0.005   13.579   14.484
 make_images                        222  8.7    2.034    2.474   13.512   14.414
 hybrid_alltoall_any                227 10.6    0.800    3.832    8.084    9.726
 make_images_data                   222  9.7    0.003    0.004    8.308    9.593
 multiply_cannon_sync_h2d           444  9.7    6.741    8.394    6.741    8.394
 dbcsr_mm_accdrv_process           3003 10.4    0.352    0.403    6.753    7.873
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.389    7.474    6.389    7.474
 arnoldi_extremal                     4  6.8    0.000    0.000    5.845    5.859
 arnoldi_normal_ev                    4  7.8    0.001    0.004    5.845    5.858
 build_subspace                      16  8.4    0.015    0.020    5.433    5.443
 mp_sum_l                           887  5.1    2.629    4.662    2.629    4.662
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.432    4.590
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.021    4.173    4.377
 dbcsr_matrix_vector_mult_local     304 10.0    3.703    4.170    3.705    4.173
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.550    3.991
 mp_irecv_dv                       1241 11.2    1.536    3.964    1.536    3.964
 calculate_norms                    792  9.8    3.615    3.747    3.615    3.747
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.858    3.695
 mp_allgather_i34                   111  8.7    1.106    3.577    1.106    3.577
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.475    3.523
 ls_scf_post                          1  4.0    0.000    0.000    3.413    3.415
 make_images_sizes                  222  9.7    0.000    0.000    0.890    3.347
 mp_alltoall_i44                    222 10.7    0.890    3.346    0.890    3.346
 ls_scf_store_result                  1  5.0    0.000    0.000    3.192    3.220
 dbcsr_finalize                     304  7.8    0.062    0.077    2.196    2.293
 dbcsr_data_new                    4608  9.7    1.790    2.250    1.790    2.250
 dbcsr_merge_all                    275  8.9    0.479    0.523    2.055    2.138
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.007    2.008
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.975    1.976
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.002    1.975    1.976
 qs_energies_init_hamiltonians        1  3.0    0.000    0.001    1.920    1.920
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=91.427000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3756.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.827216E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284089.
 MP_Allreduce         3123                  21388.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.079    0.124  104.951  104.974
 qs_energies                          1  2.0    0.000    0.000  103.610  103.612
 ls_scf                               1  3.0    0.000    0.000  100.655  100.657
 dbcsr_multiply_generic             111  6.7    0.024    0.027   74.379   74.512
 ls_scf_main                          1  4.0    0.000    0.000   63.249   63.249
 density_matrix_trs4                  2  5.0    0.002    0.003   54.283   54.336
 multiply_cannon                    111  7.7    0.124    0.203   48.269   50.331
 multiply_cannon_loop               111  8.7    0.098    0.100   45.279   45.836
 ls_scf_init_scf                      1  4.0    0.000    0.000   33.614   33.614
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   32.101   32.126
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.389   29.400
 mp_waitall_1                      4527 11.1   21.453   25.244   21.453   25.244
 make_m2s                           222  7.7    0.005    0.005   22.284   23.394
 make_images                        222  8.7    3.563    3.861   22.176   23.285
 multiply_cannon_multrec            444  9.7   17.761   18.368   22.392   22.986
 hybrid_alltoall_any                227 10.6    1.652    3.619   12.557   15.308
 make_images_data                   222  9.7    0.003    0.004   12.718   14.854
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.151   10.395
 multiply_cannon_sync_h2d           444  9.7    8.790    8.845    8.790    8.845
 arnoldi_extremal                     4  6.8    0.000    0.000    7.418    7.431
 arnoldi_normal_ev                    4  7.8    0.002    0.009    7.418    7.431
 build_subspace                      16  8.4    0.026    0.036    6.870    6.880
 dbcsr_matrix_vector_mult           304  9.0    0.017    0.033    5.494    5.668
 dbcsr_matrix_vector_mult_local     304 10.0    5.057    5.395    5.059    5.398
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.087    5.173
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.890    5.141
 dbcsr_mm_accdrv_process           1814 10.4    0.313    0.356    4.441    4.575
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.105    4.246    4.105    4.246
 ls_scf_post                          1  4.0    0.000    0.000    3.792    3.795
 mp_allgather_i34                   111  8.7    1.119    3.534    1.119    3.534
 make_images_sizes                  222  9.7    0.000    0.000    1.455    3.534
 mp_alltoall_i44                    222 10.7    1.455    3.534    1.455    3.534
 ls_scf_store_result                  1  5.0    0.000    0.000    3.495    3.530
 calculate_norms                    792  9.8    3.233    3.274    3.233    3.274
 dbcsr_finalize                     304  7.8    0.082    0.089    3.071    3.175
 dbcsr_merge_all                    275  8.9    0.883    0.919    2.857    2.954
 qs_energies_init_hamiltonians        1  3.0    0.001    0.002    2.923    2.923
 dbcsr_complete_redistribute          5  7.6    1.428    1.471    2.736    2.853
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.398    2.517
 dbcsr_sort_data                    325 11.1    2.436    2.497    2.436    2.497
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.401    2.403
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.335    2.337
 qs_ks_build_kohn_sham_matrix         3  8.3    0.002    0.002    2.335    2.337
 dbcsr_data_new                    6591  9.6    1.890    2.328    1.890    2.328
 dbcsr_new_transposed                 4  7.5    0.241    0.249    2.259    2.267
 dbcsr_frobenius_norm                74  6.6    2.055    2.140    2.201    2.234
 dbcsr_add_d                        103  6.2    0.000    0.000    2.123    2.206
 dbcsr_add_anytype                  103  7.2    0.858    0.890    2.123    2.205
 dbcsr_data_release               12724 10.6    1.973    2.176    1.973    2.176
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=104.974000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=7040.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1e070e574e92b071248faedeb93e25aa28b92159_performance_tests/27/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32        7009386627072       0.0%      0.0%    100.0%
 flops     9 x     9 x    32        7335108845568       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        9866241589248       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        9884108906496       0.0%      0.0%    100.0%
 flops    22 x    22 x    32       13354440523776       0.0%      0.0%    100.0%
 flops    32 x    32 x     9       20607185977344       0.0%      0.0%    100.0%
 flops    32 x    32 x    22       25186560638976       0.0%      0.0%    100.0%
 flops     9 x    32 x    32       28458319085568       0.0%      0.0%    100.0%
 flops    22 x    32 x    32       34782389993472       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       42881542373376       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       55680402235392       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       55680402235392       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       72328573419520       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       383.054662E+12       0.0%      0.0%    100.0%
 flops max/rank                    733.641090E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                        26899403712       0.0%      0.0%    100.0%
 number of processed stacks             118860288       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     226.3
 marketing flops                   780.439111E+12
 -------------------------------------------------------------------------------
 # multiplications                           1445
 max memory usage/rank             592.896000E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged               102097920
 MPI messages size (bytes):
  total size                        37.227590E+12
  min size                           0.000000E+00
  max size                           4.551360E+06
  average size                     364.626312E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              731472                        0
       128 < size <=     8192            11922720              97670922240
      8192 < size <=    32768            24718992             614677610496
     32768 < size <=   131072            20000256            1970081366016
    131072 < size <=  4194304            42515668           24886801223040
   4194304 < size <= 16777216             2208812            9656099886720
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4640                  78072.
 MP_Allreduce        13232                   2081.
 MP_Sync              1064
 MP_Alltoall          2588                4806292.
 MP_SendRecv        168740                  11136.
 MP_ISendRecv        92040                  11136.
 MP_Wait            102830
 MP_comm_split          40
 MP_ISend            26090                  85106.
 MP_IRecv            37890                  59644.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.043  239.379  239.380
 qs_mol_dyn_low                       1  2.0    0.004    0.030  238.400  238.422
 qs_forces                            5  3.8    0.004    0.005  238.292  238.294
 qs_energies                          5  4.8    0.002    0.004  235.226  235.244
 scf_env_do_scf                       5  5.8    0.000    0.001  220.452  220.456
 scf_env_do_scf_inner_loop          105  6.6    0.002    0.006  192.978  192.980
 qs_scf_new_mos                     105  7.6    0.000    0.001  150.984  151.183
 qs_scf_loop_do_ot                  105  8.6    0.001    0.001  150.984  151.183
 ot_scf_mini                        105  9.6    0.003    0.003  140.997  141.151
 dbcsr_multiply_generic            1445 12.2    0.127    0.136  134.221  134.653
 multiply_cannon                   1445 13.2    0.276    0.291  114.570  116.655
 multiply_cannon_loop              1445 14.2    2.863    3.019  112.706  114.011
 velocity_verlet                      4  3.0    0.006    0.023  108.432  108.433
 ot_mini                            105 10.6    0.001    0.001   60.652   60.768
 qs_ot_get_p                        112 10.4    0.001    0.001   50.270   50.530
 multiply_cannon_multrec          69360 15.2   29.784   34.730   39.720   44.546
 mp_waitall_1                    488190 16.1   35.269   41.979   35.269   41.979
 qs_ot_p2m_diag                      40 11.0    0.020    0.030   38.983   39.085
 qs_ot_get_derivative                55 11.6    0.001    0.001   38.795   38.917
 cp_dbcsr_syevd                      40 12.0    0.002    0.002   35.601   35.602
 multiply_cannon_sync_h2d         69360 15.2   28.970   33.252   28.970   33.252
 multiply_cannon_metrocomm3       69360 15.2    0.202    0.213   25.781   33.204
 cp_fm_syevd                         40 13.0    0.000    0.001   30.259   30.391
 rebuild_ks_matrix                  110  8.4    0.000    0.000   29.832   30.013
 qs_ks_build_kohn_sham_matrix       110  9.4    0.011    0.016   29.832   30.013
 qs_ks_update_qs_env                112  7.6    0.001    0.001   27.434   27.598
 init_scf_loop                        7  6.6    0.000    0.000   27.428   27.429
 cp_fm_redistribute_end              40 14.0   12.708   25.359   12.714   25.361
 cp_fm_syevd_base                    40 14.0   12.638   25.294   12.638   25.294
 apply_preconditioner_dbcsr          62 12.6    0.000    0.000   23.137   23.348
 apply_single                        62 13.6    0.000    0.000   23.137   23.348
 prepare_preconditioner               7  7.6    0.000    0.000   22.513   22.543
 make_preconditioner                  7  8.6    0.000    0.000   22.513   22.543
 ot_new_cg_direction                 55 11.6    0.001    0.001   21.128   21.129
 qs_rho_update_rho_low              110  7.6    0.001    0.001   17.953   18.401
 calculate_rho_elec                 110  8.6    0.030    0.032   17.953   18.401
 make_full_inverse_cholesky           7  9.6    0.000    0.000   15.159   15.221
 qs_ot_get_orbitals                 105 10.6    0.001    0.001   14.784   14.967
 qs_ot_get_derivative_taylor         37 12.8    0.001    0.001   13.887   13.984
 mp_sum_l                          4764 12.2   12.270   13.043   12.270   13.043
 pw_transfer                       1645 12.4    0.079    0.099   12.466   12.705
 fft_wrap_pw1pw2                   1425 13.5    0.012    0.015   12.328   12.572
 density_rs2pw                      110  9.6    0.005    0.007   12.037   12.564
 init_scf_run                         5  5.8    0.000    0.000   12.087   12.088
 scf_env_initial_rho_setup            5  6.8    0.001    0.002   12.087   12.088
 calculate_dm_sparse                110  9.5    0.000    0.001   11.625   11.845
 fft_wrap_pw1pw2_240                915 15.0    1.129    1.224   10.902   11.147
 qs_vxc_create                      110 10.4    0.002    0.004   10.930   10.977
 dbcsr_mm_accdrv_process         154766 15.8    6.231    6.406    9.804   10.594
 cp_fm_cholesky_invert                7 10.6   10.545   10.556   10.545   10.556
 qs_ot_get_derivative_diag           18 12.0    0.000    0.001   10.442   10.507
 check_diag                          80 13.5    8.683    8.936    9.745    9.878
 fft3d_pb                           915 16.0    2.378    2.661    9.140    9.397
 transfer_rs2pw                     445 10.6    0.007    0.008    8.512    9.092
 sum_up_and_integrate                60 10.3    0.001    0.002    8.794    8.805
 integrate_v_rspace                  60 11.3    0.001    0.002    8.777    8.789
 acc_transpose_blocks             69360 15.2    0.356    0.371    7.594    8.213
 xc_rho_set_and_dset_create         110 12.4    0.077    0.097    7.607    7.883
 multiply_cannon_metrocomm1       69360 15.2    0.096    0.103    4.668    7.640
 cp_dbcsr_sm_fm_multiply             15  9.3    0.001    0.003    7.617    7.635
 make_m2s                          2890 13.2    0.079    0.088    6.913    7.542
 make_images                       2890 14.2    0.241    0.261    6.805    7.436
 xc_vxc_pw_create                    60 11.3    0.039    0.049    7.323    7.370
 calculate_first_density_matrix       1  7.0    0.000    0.002    7.185    7.202
 cp_dbcsr_sm_fm_multiply_core        15 10.3    0.000    0.000    7.129    7.183
 make_full_single_inverse             7  9.6    0.001    0.001    7.025    7.056
 xc_pw_derive                       510 13.4    0.005    0.006    6.632    6.715
 mp_alltoall_z22v                  2340 17.7    6.224    6.502    6.224    6.502
 acc_transpose_blocks_kernels     69360 16.2    0.848    0.895    4.810    5.330
 mp_waitany                        7680 13.5    4.499    5.112    4.499    5.112
 potential_pw2rs                     60 12.3    0.003    0.003    4.953    4.977
 multiply_cannon_metrocomm4       67915 15.2    0.184    0.200    2.024    4.907
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="601", plot="h2o_512_md", label="(64n/12r/1t)", y=239.380000, yerr=0.000000
PlotPoint: name="602", plot="h2o_512_md_mem", label="(64n/12r/1t)", y=562.600000, yerr=3.322650
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 1e070e574e92b071248faedeb93e25aa28b92159
Summary: empty
Status: OK