=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 186428f4e848f883d21da2d2b78d036968ebedff


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10,
#              COSMA 2.6.6, ELPA 2022.11.001, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.2.0, LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.8.2,
#              SIRIUS 7.4.3, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Author: Matthias Krack (31.05.2023)
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.6
USE_ELPA       := 2022.11.001
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.2.0
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.8.2
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.4.3
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.5
HDF5_VER       := 1.12.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   HDF5_VER       := $(strip $(HDF5_VER))
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(HDF5_VER)/lib
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__HDF5
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/01
 job id: 47197344
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/02
 job id: 47197345
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/03
 job id: 47197346
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/04
 job id: 47197347
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/05
 job id: 47197348
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/06
 job id: 47197349
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/07
 job id: 47197350
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/08
 job id: 47197351
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/09
 job id: 47197352
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/10
 job id: 47197353
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/11
 job id: 47197354
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/12
 job id: 47197355
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/13
 job id: 47197356
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/14
 job id: 47197358
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/15
 job id: 47197359
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/16
 job id: 47197360
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/17
 job id: 47197362
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/18
 job id: 47197363
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/19
 job id: 47197364
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/20
 job id: 47197366
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/21
 job id: 47197367
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/22
 job id: 47197368
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/23
 job id: 47197369
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/24
 job id: 47197370
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/25
 job id: 47197371
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/26
 job id: 47197372
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: 512 H2O (4 NVE MD steps on 64 nodes)
 input file: benchmarks/QS/00512_H2O/H2O-512_md.inp
 required files: []
 output file: result.log
 # nodes = 64
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/27
 job id: 47197374
 --- Point ---
 name: 601
 plot: h2o_512_md
 regex: CP2K  
 label: (64n/12r/1t)
 --- Point ---
 name: 602
 plot: h2o_512_md_mem
 regex: Estimated peak process memory 
 label: (64n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md_mem", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          424                      8.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.041  146.545  146.546
 farming_run                          1  2.0  145.927  145.929  146.488  146.491
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.464390E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              230                1103589.
 MP_Allreduce          571                1938539.
 MP_Sync                25
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split          10
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.010    0.029  120.072  120.073
 qs_energies                          1  2.0    0.000    0.000  119.753  119.757
 mp2_main                             1  3.0    0.000    0.000  115.819  115.822
 mp2_gpw_main                         1  4.0    0.019    0.025  113.857  113.861
 mp2_ri_gpw_compute_in                1  5.0    0.172    0.175   93.025   93.442
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.005   54.972   55.389
 mp2_eri_3c_integrate_gpw           272  7.0    0.153    0.168   41.415   46.629
 get_2c_integrals                     1  6.0    0.008    0.009   37.266   37.881
 integrate_v_rspace                 273  8.0    0.432    0.444   24.821   29.885
 pw_transfer                       6555 10.6    0.376    0.384   27.440   28.004
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.047   26.128   26.710
 grid_integrate_task_list           273  9.0   20.599   26.169   20.599   26.169
 fft_wrap_pw1pw2_100               2178 12.4    1.156    1.209   23.600   24.188
 rpa_ri_compute_en                    1  5.0    0.020    0.024   20.717   20.976
 compute_2c_integrals                 1  7.0    0.002    0.002   19.672   19.673
 cp_fm_cholesky_decompose            12  8.2   18.272   19.402   18.272   19.402
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.004   18.802   18.908
 mp2_eri_2c_integrate_gpw             1  9.0    2.391    2.437   18.800   18.907
 cholesky_decomp                      1  7.0    0.000    0.000   16.442   17.041
 fft3d_s                           5443 13.4   16.176   16.645   16.198   16.666
 ao_to_mo_and_store_B_mult_1        272  7.0   10.724   15.305   10.724   15.305
 calculate_wavefunction             272  8.0    5.409    5.548   12.547   13.151
 rpa_num_int                          1  6.0    0.000    0.000   11.849   11.849
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   11.286   11.847
 calc_mat_Q                           8  8.0    0.000    0.000    9.506    9.623
 contract_S_to_Q                      8  9.0    0.000    0.000    8.933    9.053
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.531    8.645
 parallel_gemm_fm_cosma              14 10.1    8.531    8.645    8.531    8.645
 calc_potential_gpw                 544  9.5    0.005    0.005    8.217    8.524
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.002    8.159    8.378
 create_integ_mat                     1  6.0    0.022    0.028    8.353    8.354
 potential_pw2rs                    545 10.0    0.107    0.109    7.704    8.325
 collocate_single_gaussian          272 10.0    0.039    0.041    7.440    7.652
 array2fm                             1  7.0    0.000    0.000    6.693    7.313
 pw_scatter_s                      2720 13.7    4.450    4.599    4.450    4.599
 pw_gather_s                       2722 13.2    3.889    4.306    3.889    4.306
 scf_env_do_scf                       1  3.0    0.000    0.000    3.569    3.569
 init_scf_loop                        1  4.0    0.000    0.000    3.373    3.373
 cp_fm_syevd                          4  7.5    0.000    0.000    3.228    3.233
 array2fm_buffer_send                 1  8.0    2.904    3.137    2.904    3.137
 prepare_preconditioner               1  5.0    0.000    0.000    3.056    3.057
 make_preconditioner                  1  6.0    0.000    0.000    2.980    2.981
 cp_fm_redistribute_end               4  8.5    1.474    2.943    1.475    2.944
 cp_fm_syevd_base                     4  8.5    1.466    2.933    1.466    2.933
 make_full_all                        1  7.0    0.000    0.000    2.899    2.901
 mp_sync                             25  8.8    1.180    2.448    1.180    2.448
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=113.861029, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2801.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          424                      9.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.034    0.042  408.396  408.398
 farming_run                          1  2.0  407.381  407.392  408.307  408.311
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827141120       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788822       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.224126E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              743                 386399.
 MP_Allreduce         2021                  21391.
 MP_Sync                37
 MP_Alltoall            77               13520837.
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.025    0.046  211.254  211.255
 qs_energies                          1  2.0    0.000    0.001  210.966  210.987
 scf_env_do_scf                       1  3.0    0.000    0.000  106.984  106.984
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  105.402  105.411
 rebuild_ks_matrix                    4  6.0    0.000    0.000  105.400  105.409
 qs_ks_build_kohn_sham_matrix         4  7.0    0.065    0.075  105.400  105.409
 hfx_ks_matrix                        4  8.0    0.001    0.001  104.844  104.849
 integrate_four_center                4  9.0    0.143    0.447  104.843  104.848
 mp2_main                             1  3.0    0.001    0.016  103.635  103.655
 mp2_gpw_main                         1  4.0    0.032    0.047  101.876  101.897
 integrate_four_center_main           4 10.0    0.093    0.481   96.724   99.231
 integrate_four_center_bin          267 11.0   96.632   98.749   96.632   98.749
 init_scf_loop                        1  4.0    0.000    0.000   92.419   92.419
 mp2_ri_gpw_compute_in                1  5.0    0.065    0.072   74.587   75.674
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.003   53.940   55.025
 mp2_eri_3c_integrate_gpw            91  7.0    0.143    0.161   41.709   46.832
 integrate_v_rspace                  95  8.0    0.396    0.572   28.243   33.145
 pw_transfer                       2240 10.6    0.144    0.160   30.004   30.444
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.020   29.008   29.482
 mp2_ri_gpw_compute_en                1  5.0    0.057    0.081   27.114   28.793
 grid_integrate_task_list            95  9.0   23.379   28.446   23.379   28.446
 ao_to_mo_and_store_B_mult_1         91  7.0   10.556   28.292   10.556   28.292
 fft_wrap_pw1pw2_100                730 12.4    1.296    1.441   26.632   27.082
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.834    1.884   25.384   25.394
 get_2c_integrals                     1  6.0    0.000    0.000   20.394   20.582
 compute_2c_integrals                 1  7.0    0.002    0.003   19.342   19.348
 compute_2c_integrals_loop_lm         1  8.0    0.001    0.002   18.912   19.171
 mp2_eri_2c_integrate_gpw             1  9.0    1.733    1.864   18.911   19.170
 fft3d_s                           1823 13.4   18.417   18.732   18.430   18.746
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.000   14.560   14.560
 calculate_wavefunction              91  8.0    2.038    2.069    9.750    9.965
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.556    0.594    8.852    9.356
 potential_pw2rs                    186 10.0    0.034    0.035    8.794    9.326
 mp2_ri_gpw_compute_en_comm          22  7.0    0.511    0.532    8.306    8.905
 local_gemm                         172  8.0    8.296    8.777    8.296    8.777
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001    8.269    8.643
 collocate_single_gaussian           91 10.0    0.017    0.021    7.909    8.175
 calc_potential_gpw                 182  9.5    0.002    0.002    7.920    8.095
 mp_sendrecv_dm3                   2068  8.0    6.307    6.885    6.307    6.885
 mp_sync                             37 10.5    2.849    6.654    2.849    6.654
 mp2_ri_gpw_compute_en_ener         172  7.0    6.348    6.407    6.348    6.407
 pw_gather_s                        912 13.2    4.886    5.338    4.886    5.338
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=101.874908, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1511.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             451.796992E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62385.
 MP_Allreduce        10329                    270.
 MP_Sync               530
 MP_Alltoall          2083                 572054.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.044   67.860   67.861
 qs_mol_dyn_low                       1  2.0    0.003    0.005   67.512   67.520
 qs_forces                           11  3.9    0.010    0.048   67.441   67.444
 qs_energies                         11  4.9    0.003    0.011   65.521   65.540
 scf_env_do_scf                      11  5.9    0.001    0.036   58.190   58.190
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   54.619   54.621
 qs_scf_new_mos                     108  7.5    0.000    0.001   39.261   39.589
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   39.260   39.589
 dbcsr_multiply_generic            2286 12.5    0.095    0.099   37.415   37.883
 ot_scf_mini                        108  9.5    0.002    0.002   37.459   37.642
 velocity_verlet                     10  3.0    0.001    0.001   33.225   33.226
 multiply_cannon                   2286 13.5    0.187    0.200   27.313   28.754
 multiply_cannon_loop              2286 14.5    1.490    1.585   25.812   27.340
 ot_mini                            108 10.5    0.001    0.001   22.070   22.327
 qs_ot_get_derivative               108 11.5    0.001    0.001   18.903   19.084
 mp_waitall_1                    245248 16.5   10.552   16.426   10.552   16.426
 multiply_cannon_metrocomm3       54864 15.5    0.068    0.073    6.139   13.129
 multiply_cannon_multrec          54864 15.5    4.141    6.371    7.751   11.219
 rebuild_ks_matrix                  119  8.3    0.000    0.000   10.804   10.950
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.011   10.803   10.949
 qs_ot_get_p                        119 10.4    0.001    0.001   10.155   10.431
 qs_ks_update_qs_env                119  7.6    0.001    0.001    9.537    9.670
 mp_sum_l                          7287 12.8    5.823    7.586    5.823    7.586
 qs_rho_update_rho_low              119  7.7    0.001    0.001    7.380    7.515
 calculate_rho_elec                 119  8.7    0.012    0.017    7.380    7.514
 multiply_cannon_sync_h2d         54864 15.5    5.718    7.047    5.718    7.047
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    6.566    7.018
 sum_up_and_integrate               119 10.3    0.012    0.014    6.750    6.761
 integrate_v_rspace                 119 11.3    0.002    0.003    6.738    6.750
 qs_ot_p2m_diag                      50 11.0    0.004    0.007    6.405    6.445
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    6.307    6.409
 init_scf_run                        11  5.9    0.000    0.001    5.756    5.757
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    5.756    5.756
 rs_pw_transfer                     974 11.9    0.011    0.012    5.305    5.508
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    5.482    5.484
 density_rs2pw                      119  9.7    0.004    0.005    4.939    5.082
 dbcsr_mm_accdrv_process          76910 16.1    1.191    1.894    3.532    4.943
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    4.852    4.857
 cp_fm_redistribute_end              50 14.0    2.481    4.772    2.495    4.782
 cp_fm_diag_elpa_base                50 14.0    2.269    4.561    2.275    4.572
 pw_transfer                       1439 11.6    0.053    0.058    4.373    4.543
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.008    4.296    4.470
 make_m2s                          4572 13.5    0.055    0.056    3.976    4.169
 fft3d_ps                          1201 14.6    0.378    0.499    3.947    4.127
 make_images                       4572 14.5    0.134    0.140    3.890    4.079
 potential_pw2rs                    119 12.3    0.004    0.004    3.839    3.926
 multiply_cannon_metrocomm1       54864 15.5    0.055    0.060    2.128    3.854
 fft_wrap_pw1pw2_140                487 13.2    0.176    0.196    3.308    3.481
 init_scf_loop                       11  6.9    0.000    0.000    3.472    3.474
 calculate_dm_sparse                119  9.5    0.000    0.001    3.228    3.395
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.064    3.262
 apply_single                       119 13.6    0.000    0.000    3.063    3.262
 jit_kernel_multiply                 13 15.8    2.279    2.960    2.279    2.960
 mp_alltoall_d11v                  2130 13.8    2.703    2.893    2.703    2.893
 ot_diis_step                       108 11.5    0.006    0.006    2.887    2.888
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.819    2.876
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.853    2.859
 mp_alltoall_z22v                  1201 16.6    2.660    2.792    2.660    2.792
 wfi_extrapolate                     11  7.9    0.001    0.001    2.762    2.762
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.752    2.755
 make_images_sizes                 4572 15.5    0.004    0.004    2.179    2.656
 mp_alltoall_i44                   4572 16.5    2.175    2.652    2.175    2.652
 acc_transpose_blocks             54864 15.5    0.226    0.248    1.843    2.338
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.196    2.249
 mp_sum_d                          4135 12.0    1.530    2.131    1.530    2.131
 grid_integrate_task_list           119 12.3    1.992    2.120    1.992    2.120
 prepare_preconditioner              11  7.9    0.000    0.000    2.032    2.061
 make_preconditioner                 11  8.9    0.000    0.000    2.032    2.060
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.915    1.955
 make_images_data                  4572 15.5    0.039    0.043    1.103    1.770
 mp_waitany                       12084 13.8    1.644    1.766    1.644    1.766
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.670    1.694
 yz_to_x                            249 15.8    0.020    0.029    1.569    1.694
 mp_allgather_i34                  2286 14.5    1.049    1.643    1.049    1.643
 mp_sendrecv_dv                   22610 12.7    1.341    1.443    1.341    1.443
 grid_collocate_task_list           119  9.7    1.357    1.439    1.357    1.439
 hybrid_alltoall_any               4725 16.4    0.041    0.114    0.934    1.418
 arnoldi_extremal                   119 11.4    0.001    0.001    1.260    1.380
 arnoldi_normal_ev                  119 12.4    0.002    0.002    1.259    1.378
 dbcsr_complete_redistribute        329 12.2    0.049    0.082    1.332    1.375
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=67.861000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=430.181818, yerr=1.113404
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             489.406464E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62664.
 MP_Allreduce        10306                    303.
 MP_Sync                54
 MP_Alltoall          2060                1492288.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.032   41.275   41.276
 qs_mol_dyn_low                       1  2.0    0.003    0.003   41.073   41.081
 qs_forces                           11  3.9    0.003    0.003   41.006   41.007
 qs_energies                         11  4.9    0.001    0.002   39.245   39.251
 scf_env_do_scf                      11  5.9    0.001    0.002   33.785   33.786
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   31.065   31.066
 dbcsr_multiply_generic            2286 12.5    0.101    0.103   22.117   22.514
 qs_scf_new_mos                     108  7.5    0.001    0.001   20.979   21.214
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   20.979   21.214
 ot_scf_mini                        108  9.5    0.002    0.003   20.046   20.213
 velocity_verlet                     10  3.0    0.001    0.002   19.655   19.656
 multiply_cannon                   2286 13.5    0.208    0.215   16.658   18.296
 multiply_cannon_loop              2286 14.5    0.910    0.980   15.423   16.922
 ot_mini                            108 10.5    0.001    0.001   12.235   12.469
 mp_waitall_1                    200699 16.5    6.132   11.326    6.132   11.326
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.712    9.886
 multiply_cannon_metrocomm3       27432 15.5    0.067    0.070    4.202    9.433
 multiply_cannon_multrec          27432 15.5    1.990    4.395    6.008    8.704
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.711    7.844
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    7.711    7.843
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.817    6.937
 dbcsr_mm_accdrv_process          47894 16.0    3.063    5.272    3.948    5.810
 qs_ot_get_p                        119 10.4    0.001    0.001    4.797    5.022
 sum_up_and_integrate               119 10.3    0.025    0.029    4.652    4.658
 integrate_v_rspace                 119 11.3    0.002    0.003    4.627    4.635
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.744    4.596
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.341    4.381
 calculate_rho_elec                 119  8.7    0.021    0.024    4.340    4.381
 init_scf_run                        11  5.9    0.000    0.001    4.208    4.209
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    4.208    4.208
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.093    4.167
 apply_single                       119 13.6    0.000    0.000    3.093    4.166
 mp_sum_l                          7287 12.8    2.194    4.155    2.194    4.155
 rs_pw_transfer                     974 11.9    0.010    0.013    3.228    3.689
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    3.185    3.203
 make_m2s                          4572 13.5    0.052    0.054    2.820    3.096
 density_rs2pw                      119  9.7    0.004    0.004    2.591    3.077
 make_images                       4572 14.5    0.201    0.237    2.730    3.004
 multiply_cannon_sync_h2d         27432 15.5    2.161    2.862    2.161    2.862
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.722    2.723
 init_scf_loop                       11  6.9    0.000    0.000    2.688    2.688
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.527    2.529
 ot_diis_step                       108 11.5    0.011    0.011    2.468    2.469
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.275    2.365
 potential_pw2rs                    119 12.3    0.006    0.006    2.329    2.352
 calculate_dm_sparse                119  9.5    0.000    0.001    2.223    2.300
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.287    2.290
 cp_fm_redistribute_end              50 14.0    1.159    2.251    1.163    2.255
 pw_transfer                       1439 11.6    0.066    0.071    2.195    2.250
 cp_fm_diag_elpa_base                50 14.0    1.058    2.145    1.087    2.187
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.103    2.160
 jit_kernel_multiply                 10 16.0    0.833    2.067    0.833    2.067
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.980    1.982
 grid_integrate_task_list           119 12.3    1.832    1.927    1.832    1.927
 make_images_data                  4572 15.5    0.045    0.051    1.256    1.777
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.723    1.762
 prepare_preconditioner              11  7.9    0.000    0.000    1.725    1.753
 make_preconditioner                 11  8.9    0.000    0.000    1.725    1.753
 fft3d_ps                          1201 14.6    0.524    0.580    1.654    1.706
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.617    1.678
 wfi_extrapolate                     11  7.9    0.001    0.001    1.615    1.615
 fft_wrap_pw1pw2_140                487 13.2    0.203    0.213    1.554    1.606
 hybrid_alltoall_any               4725 16.4    0.051    0.111    1.109    1.584
 mp_alltoall_d11v                  2130 13.8    1.388    1.571    1.388    1.571
 acc_transpose_blocks             27432 15.5    0.109    0.114    1.214    1.520
 mp_allgather_i34                  2286 14.5    0.675    1.499    0.675    1.499
 grid_collocate_task_list           119  9.7    1.272    1.422    1.272    1.422
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.412    1.421
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.265    1.317
 mp_sum_d                          4135 12.0    0.620    1.104    0.620    1.104
 rs_pw_transfer_RS2PW_140           130 11.5    0.140    0.148    0.564    1.032
 mp_waitany                        5720 13.7    0.546    1.032    0.546    1.032
 make_images_sizes                 4572 15.5    0.005    0.005    0.734    1.019
 mp_alltoall_i44                   4572 16.5    0.729    1.015    0.729    1.015
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.955    0.968
 rs_pw_transfer_PW2RS_50            119 14.3    0.593    0.612    0.858    0.950
 mp_alltoall_z22v                  1201 16.6    0.857    0.947    0.857    0.947
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    0.945    0.946
 acc_transpose_blocks_kernels     27432 16.5    0.186    0.278    0.665    0.877
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=41.276000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=466.000000, yerr=1.537412
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             521.547776E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63497.
 MP_Allreduce        10155                    305.
 MP_Sync                54
 MP_Alltoall          1821                1049484.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.032    0.050   43.631   43.637
 qs_mol_dyn_low                       1  2.0    0.003    0.003   43.023   43.033
 qs_forces                           11  3.9    0.002    0.002   42.955   42.957
 qs_energies                         11  4.9    0.003    0.015   41.108   41.113
 scf_env_do_scf                      11  5.9    0.001    0.003   35.562   35.564
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.011   31.771   31.773
 dbcsr_multiply_generic            2286 12.5    0.099    0.103   20.347   20.953
 velocity_verlet                     10  3.0    0.001    0.001   20.628   20.629
 qs_scf_new_mos                     108  7.5    0.001    0.001   20.575   20.611
 qs_scf_loop_do_ot                  108  8.5    0.001    0.002   20.574   20.611
 ot_scf_mini                        108  9.5    0.002    0.004   19.613   19.637
 multiply_cannon                   2286 13.5    0.199    0.205   14.577   16.020
 multiply_cannon_loop              2286 14.5    0.643    0.675   13.246   15.046
 ot_mini                            108 10.5    0.001    0.001   11.598   11.644
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.666    9.693
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.172    8.318
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    8.171    8.317
 mp_waitall_1                    158411 16.6    5.106    7.926    5.106    7.926
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.228    7.361
 multiply_cannon_multrec          18288 15.5    1.914    2.859    6.951    7.260
 dbcsr_mm_accdrv_process          38222 16.0    4.249    5.834    4.954    5.891
 sum_up_and_integrate               119 10.3    0.031    0.032    5.524    5.531
 integrate_v_rspace                 119 11.3    0.002    0.003    5.493    5.503
 qs_ot_get_p                        119 10.4    0.001    0.001    5.344    5.382
 qs_rho_update_rho_low              119  7.7    0.001    0.001    5.103    5.148
 calculate_rho_elec                 119  8.7    0.031    0.031    5.102    5.147
 rs_pw_transfer                     974 11.9    0.009    0.010    4.575    4.798
 multiply_cannon_metrocomm3       18288 15.5    0.044    0.046    2.097    4.709
 init_scf_run                        11  5.9    0.000    0.001    4.226    4.226
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    4.225    4.226
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.483    4.119
 init_scf_loop                       11  6.9    0.001    0.004    3.753    3.755
 density_rs2pw                      119  9.7    0.004    0.004    3.474    3.721
 qs_ot_p2m_diag                      50 11.0    0.012    0.013    3.480    3.498
 make_m2s                          4572 13.5    0.045    0.045    3.057    3.496
 make_images                       4572 14.5    0.193    0.205    2.971    3.409
 potential_pw2rs                    119 12.3    0.007    0.009    3.166    3.203
 cp_dbcsr_syevd                      50 12.0    0.003    0.004    3.073    3.074
 mp_sum_l                          7287 12.8    2.375    2.918    2.375    2.918
 prepare_preconditioner              11  7.9    0.000    0.001    2.761    2.765
 make_preconditioner                 11  8.9    0.000    0.002    2.761    2.765
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.256    2.710
 apply_single                       119 13.6    0.000    0.000    2.256    2.709
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.581    2.686
 pw_transfer                       1439 11.6    0.066    0.070    2.617    2.665
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.610    2.620
 cp_fm_diag_elpa_base                50 14.0    2.577    2.598    2.606    2.617
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.524    2.573
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.524    2.535
 calculate_first_density_matrix       1  7.0    0.000    0.003    2.476    2.477
 calculate_dm_sparse                119  9.5    0.000    0.000    2.140    2.158
 fft3d_ps                          1201 14.6    0.535    0.555    2.014    2.070
 jit_kernel_multiply                 10 16.0    0.653    1.997    0.653    1.997
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.988    1.990
 grid_integrate_task_list           119 12.3    1.791    1.916    1.791    1.916
 make_images_data                  4572 15.5    0.045    0.048    1.377    1.884
 ot_diis_step                       108 11.5    0.011    0.011    1.835    1.836
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.686    1.727
 fft_wrap_pw1pw2_140                487 13.2    0.250    0.258    1.677    1.716
 wfi_extrapolate                     11  7.9    0.001    0.001    1.669    1.669
 hybrid_alltoall_any               4725 16.4    0.055    0.114    1.161    1.656
 multiply_cannon_sync_h2d         18288 15.5    1.248    1.480    1.248    1.480
 mp_alltoall_d11v                  2130 13.8    1.209    1.437    1.209    1.437
 grid_collocate_task_list           119  9.7    1.247    1.399    1.247    1.399
 mp_sendrecv_dv                   11067 12.7    1.325    1.369    1.325    1.369
 mp_allgather_i34                  2286 14.5    0.783    1.356    0.783    1.356
 make_images_sizes                 4572 15.5    0.005    0.005    0.977    1.355
 acc_transpose_blocks             18288 15.5    0.076    0.079    1.314    1.350
 mp_alltoall_i44                   4572 16.5    0.972    1.350    0.972    1.350
 mp_alltoall_z22v                  1201 16.6    1.232    1.339    1.232    1.339
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.301    1.311
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.231    1.276
 cp_fm_cholesky_invert               11 10.9    1.241    1.249    1.241    1.249
 rs_pw_transfer_PW2RS_50            119 14.3    0.400    0.412    1.001    1.118
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.027    1.041
 rs_pw_transfer_RS2PW_50            119 11.7    0.241    0.250    0.902    0.997
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    0.989    0.991
 dbcsr_complete_redistribute        329 12.2    0.092    0.104    0.803    0.929
 mp_waitany                        9880 13.7    0.681    0.926    0.681    0.926
 mp_sum_d                          4135 12.0    0.717    0.909    0.717    0.909
 multiply_cannon_metrocomm1       18288 15.5    0.030    0.031    0.567    0.907
 acc_transpose_blocks_kernels     18288 16.5    0.218    0.228    0.858    0.882
 cp_fm_cholesky_decompose            22 10.9    0.871    0.875    0.871    0.875
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=43.637000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=496.272727, yerr=2.452861
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             550.502400E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63496.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.036   47.771   47.772
 qs_mol_dyn_low                       1  2.0    0.003    0.003   47.451   47.460
 qs_forces                           11  3.9    0.006    0.032   47.381   47.382
 qs_energies                         11  4.9    0.002    0.007   45.349   45.356
 scf_env_do_scf                      11  5.9    0.001    0.001   39.348   39.349
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   33.900   33.901
 velocity_verlet                     10  3.0    0.001    0.002   24.293   24.295
 qs_scf_new_mos                     108  7.5    0.001    0.001   22.214   22.286
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   22.214   22.285
 dbcsr_multiply_generic            2286 12.5    0.099    0.102   21.822   21.976
 ot_scf_mini                        108  9.5    0.003    0.004   21.077   21.138
 multiply_cannon                   2286 13.5    0.226    0.267   15.687   16.439
 multiply_cannon_loop              2286 14.5    0.954    0.983   14.183   14.675
 ot_mini                            108 10.5    0.001    0.001   11.944   12.025
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.810    9.870
 multiply_cannon_multrec          27432 15.5    2.349    3.028    8.632    8.942
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.555    8.628
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    8.554    8.627
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.640    7.709
 dbcsr_mm_accdrv_process          47916 15.9    5.440    7.090    6.192    7.380
 qs_ot_get_p                        119 10.4    0.001    0.001    5.974    6.077
 sum_up_and_integrate               119 10.3    0.036    0.039    5.412    5.418
 init_scf_loop                       11  6.9    0.001    0.006    5.399    5.401
 integrate_v_rspace                 119 11.3    0.003    0.003    5.376    5.383
 qs_rho_update_rho_low              119  7.7    0.001    0.001    5.265    5.296
 calculate_rho_elec                 119  8.7    0.040    0.046    5.264    5.295
 mp_waitall_1                    137007 16.6    4.464    5.279    4.464    5.279
 rs_pw_transfer                     974 11.9    0.009    0.010    4.355    4.525
 init_scf_run                        11  5.9    0.000    0.001    4.409    4.409
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    4.408    4.409
 prepare_preconditioner              11  7.9    0.000    0.001    4.300    4.309
 make_preconditioner                 11  8.9    0.000    0.002    4.300    4.309
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.811    4.198
 make_m2s                          4572 13.5    0.054    0.056    3.834    4.112
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.490    4.081
 make_images                       4572 14.5    0.271    0.333    3.725    3.999
 qs_ot_p2m_diag                      50 11.0    0.016    0.023    3.895    3.907
 density_rs2pw                      119  9.7    0.004    0.004    3.539    3.701
 cp_dbcsr_syevd                      50 12.0    0.003    0.004    3.451    3.452
 potential_pw2rs                    119 12.3    0.009    0.010    3.023    3.035
 pw_transfer                       1439 11.6    0.066    0.070    2.893    2.935
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.461    2.933
 apply_single                       119 13.6    0.000    0.000    2.461    2.933
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.901    2.910
 cp_fm_diag_elpa_base                50 14.0    2.843    2.865    2.896    2.905
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.801    2.846
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.764    2.798
 mp_sum_l                          7287 12.8    1.900    2.427    1.900    2.427
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.346    2.348
 calculate_dm_sparse                119  9.5    0.000    0.000    2.263    2.331
 fft3d_ps                          1201 14.6    0.561    0.609    2.241    2.278
 make_images_data                  4572 15.5    0.045    0.048    1.757    2.213
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.094    2.098
 ot_diis_step                       108 11.5    0.012    0.012    2.071    2.071
 fft_wrap_pw1pw2_140                487 13.2    0.287    0.301    2.015    2.061
 multiply_cannon_metrocomm3       27432 15.5    0.039    0.040    1.260    2.019
 wfi_extrapolate                     11  7.9    0.001    0.001    1.981    1.981
 hybrid_alltoall_any               4725 16.4    0.062    0.150    1.483    1.952
 grid_integrate_task_list           119 12.3    1.808    1.908    1.808    1.908
 jit_kernel_multiply                 10 15.9    0.689    1.841    0.689    1.841
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.697    1.731
 cp_fm_cholesky_invert               11 10.9    1.714    1.720    1.714    1.720
 dbcsr_complete_redistribute        329 12.2    0.160    0.199    1.371    1.682
 mp_alltoall_z22v                  1201 16.6    1.563    1.597    1.563    1.597
 mp_alltoall_d11v                  2130 13.8    1.436    1.581    1.436    1.581
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.488    1.516
 acc_transpose_blocks             27432 15.5    0.111    0.113    1.483    1.505
 make_images_sizes                 4572 15.5    0.005    0.005    1.056    1.481
 mp_alltoall_i44                   4572 16.5    1.051    1.476    1.051    1.476
 grid_collocate_task_list           119  9.7    1.250    1.372    1.250    1.372
 cp_fm_upper_to_full                 72 14.2    0.929    1.356    0.929    1.356
 mp_sendrecv_dv                    8211 12.7    1.333    1.352    1.333    1.352
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.296    1.308
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.904    1.210
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    1.168    1.169
 cp_fm_cholesky_decompose            22 10.9    1.042    1.048    1.042    1.048
 mp_allgather_i34                  2286 14.5    0.793    1.029    0.793    1.029
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.015    1.023
 multiply_cannon_sync_h2d         27432 15.5    0.926    0.994    0.926    0.994
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=47.772000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=521.818182, yerr=4.281422
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             600.850432E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63495.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.029   37.130   37.130
 qs_mol_dyn_low                       1  2.0    0.003    0.003   36.888   36.896
 qs_forces                           11  3.9    0.002    0.003   36.822   36.823
 qs_energies                         11  4.9    0.001    0.002   34.911   34.915
 scf_env_do_scf                      11  5.9    0.001    0.002   29.324   29.324
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   25.627   25.628
 velocity_verlet                     10  3.0    0.001    0.001   19.084   19.087
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.166   15.212
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.165   15.211
 dbcsr_multiply_generic            2286 12.5    0.092    0.098   15.043   15.177
 ot_scf_mini                        108  9.5    0.002    0.002   14.331   14.376
 multiply_cannon                   2286 13.5    0.233    0.242   10.861   11.594
 multiply_cannon_loop              2286 14.5    0.334    0.346    9.720    9.978
 ot_mini                            108 10.5    0.001    0.001    7.526    7.574
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.483    7.537
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    7.483    7.537
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.679    6.727
 multiply_cannon_multrec           9144 15.5    1.635    1.848    6.008    6.208
 qs_ot_get_derivative               108 11.5    0.001    0.001    6.025    6.067
 sum_up_and_integrate               119 10.3    0.038    0.041    4.899    4.906
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.885    4.905
 calculate_rho_elec                 119  8.7    0.060    0.061    4.885    4.904
 integrate_v_rspace                 119 11.3    0.003    0.003    4.861    4.869
 qs_ot_get_p                        119 10.4    0.001    0.001    4.622    4.671
 dbcsr_mm_accdrv_process          12550 15.8    3.366    4.283    4.272    4.344
 mp_waitall_1                    115863 16.7    3.595    4.166    3.595    4.166
 init_scf_run                        11  5.9    0.000    0.001    3.999    3.999
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.998    3.998
 init_scf_loop                       11  6.9    0.000    0.000    3.657    3.658
 rs_pw_transfer                     974 11.9    0.008    0.008    3.241    3.387
 density_rs2pw                      119  9.7    0.004    0.004    3.031    3.183
 make_m2s                          4572 13.5    0.035    0.036    2.941    3.141
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    3.076    3.078
 make_images                       4572 14.5    0.268    0.302    2.850    3.049
 pw_transfer                       1439 11.6    0.066    0.070    2.806    2.818
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.776    2.777
 prepare_preconditioner              11  7.9    0.000    0.000    2.734    2.740
 make_preconditioner                 11  8.9    0.000    0.000    2.734    2.740
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.712    2.725
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.583    2.635
 potential_pw2rs                    119 12.3    0.010    0.011    2.424    2.432
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.362    2.363
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.270    2.278
 cp_fm_diag_elpa_base                50 14.0    2.232    2.257    2.267    2.275
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.145    2.163
 fft3d_ps                          1201 14.6    0.568    0.580    2.045    2.063
 grid_integrate_task_list           119 12.3    1.911    2.048    1.911    2.048
 fft_wrap_pw1pw2_140                487 13.2    0.364    0.375    2.000    2.014
 calculate_dm_sparse                119  9.5    0.000    0.000    1.943    1.975
 make_images_data                  4572 15.5    0.039    0.042    1.449    1.849
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.819    1.822
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.757    1.781
 jit_kernel_multiply                  9 15.5    0.868    1.763    0.868    1.763
 hybrid_alltoall_any               4725 16.4    0.063    0.176    1.315    1.744
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.571    1.586
 wfi_extrapolate                     11  7.9    0.001    0.001    1.550    1.551
 grid_collocate_task_list           119  9.7    1.357    1.485    1.357    1.485
 ot_diis_step                       108 11.5    0.012    0.013    1.465    1.467
 cp_fm_cholesky_invert               11 10.9    1.443    1.447    1.443    1.447
 mp_alltoall_d11v                  2130 13.8    1.267    1.399    1.267    1.399
 mp_alltoall_z22v                  1201 16.6    1.350    1.383    1.350    1.383
 make_images_sizes                 4572 15.5    0.005    0.005    0.827    1.361
 mp_alltoall_i44                   4572 16.5    0.823    1.357    0.823    1.357
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.301    1.346
 apply_single                       119 13.6    0.000    0.000    1.301    1.346
 multiply_cannon_metrocomm1        9144 15.5    0.022    0.023    1.129    1.279
 qs_energies_init_hamiltonians       11  5.9    0.010    0.013    1.256    1.257
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.237    1.246
 mp_allgather_i34                  2286 14.5    0.466    1.087    0.466    1.087
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.967    0.981
 mp_sum_l                          7287 12.8    0.818    0.979    0.818    0.979
 acc_transpose_blocks              9144 15.5    0.038    0.039    0.961    0.967
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.933    0.939
 mp_sendrecv_dv                    5355 12.7    0.909    0.922    0.909    0.922
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.865    0.918
 dbcsr_complete_redistribute        329 12.2    0.160    0.170    0.846    0.896
 cp_fm_cholesky_decompose            22 10.9    0.827    0.830    0.827    0.830
 yz_to_x                            606 15.1    0.053    0.055    0.780    0.801
 arnoldi_extremal                   119 11.4    0.002    0.002    0.766    0.787
 arnoldi_normal_ev                  119 12.4    0.002    0.002    0.764    0.785
 multiply_cannon_sync_h2d          9144 15.5    0.694    0.753    0.694    0.753
 qs_env_update_s_mstruct             11  6.9    0.001    0.002    0.702    0.752
 copy_dbcsr_to_fm                   153 11.3    0.002    0.002    0.726    0.743
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=37.130000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=568.909091, yerr=5.743843
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             741.838848E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63729.
 MP_Allreduce        10154                    429.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.034   43.270   43.271
 qs_mol_dyn_low                       1  2.0    0.003    0.003   43.036   43.043
 qs_forces                           11  3.9    0.002    0.002   42.972   42.973
 qs_energies                         11  4.9    0.002    0.002   40.931   40.933
 scf_env_do_scf                      11  5.9    0.001    0.001   35.051   35.051
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.006   27.065   27.066
 velocity_verlet                     10  3.0    0.001    0.001   24.247   24.253
 dbcsr_multiply_generic            2286 12.5    0.099    0.102   18.169   18.384
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.809   16.907
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.808   16.906
 ot_scf_mini                        108  9.5    0.002    0.002   15.693   15.788
 multiply_cannon                   2286 13.5    0.303    0.317   13.885   14.889
 multiply_cannon_loop              2286 14.5    0.344    0.352   12.519   13.437
 ot_mini                            108 10.5    0.001    0.001    9.230    9.342
 multiply_cannon_multrec           9144 15.5    3.444    4.799    8.739    8.878
 init_scf_loop                       11  6.9    0.000    0.000    7.953    7.956
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.592    7.726
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    7.591    7.726
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.160    7.257
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.863    6.985
 prepare_preconditioner              11  7.9    0.000    0.000    6.956    6.969
 make_preconditioner                 11  8.9    0.000    0.000    6.956    6.969
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.551    6.842
 dbcsr_mm_accdrv_process          12550 15.8    4.411    6.247    5.172    6.540
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.516    4.523
 calculate_rho_elec                 119  8.7    0.118    0.121    4.515    4.522
 cp_fm_upper_to_full                 72 14.2    3.172    4.518    3.172    4.518
 sum_up_and_integrate               119 10.3    0.064    0.066    4.215    4.222
 integrate_v_rspace                 119 11.3    0.003    0.004    4.151    4.157
 init_scf_run                        11  5.9    0.000    0.001    3.835    3.835
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.835    3.835
 qs_ot_get_p                        119 10.4    0.001    0.001    3.630    3.765
 mp_waitall_1                     94719 16.7    2.654    3.662    2.654    3.662
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.592    3.027
 pw_transfer                       1439 11.6    0.068    0.069    2.863    2.870
 make_m2s                          4572 13.5    0.038    0.038    2.618    2.867
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.764    2.771
 dbcsr_complete_redistribute        329 12.2    0.285    0.287    1.989    2.763
 make_images                       4572 14.5    0.351    0.382    2.495    2.744
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.244    2.507
 apply_single                       119 13.6    0.000    0.000    2.244    2.507
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.655    2.433
 density_rs2pw                      119  9.7    0.004    0.004    2.375    2.388
 calculate_dm_sparse                119  9.5    0.000    0.000    2.346    2.363
 fft_wrap_pw1pw2_140                487 13.2    0.618    0.624    2.283    2.292
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.270    2.271
 qs_ot_p2m_diag                      50 11.0    0.043    0.044    2.236    2.238
 mp_alltoall_i22                    627 13.8    1.391    2.203    1.391    2.203
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    1.264    2.178
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.400    2.172
 grid_integrate_task_list           119 12.3    2.074    2.105    2.074    2.105
 ot_diis_step                       108 11.5    0.014    0.014    2.048    2.049
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.945    1.946
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.826    1.877
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.843    1.844
 fft3d_ps                          1201 14.6    0.598    0.608    1.780    1.786
 mp_sum_l                          7287 12.8    1.132    1.780    1.132    1.780
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.760    1.761
 rs_pw_transfer                     974 11.9    0.009    0.009    1.671    1.699
 hybrid_alltoall_any               4725 16.4    0.087    0.149    1.294    1.635
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.608    1.608
 cp_fm_diag_elpa_base                50 14.0    1.455    1.512    1.605    1.606
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.566    1.603
 cp_fm_cholesky_invert               11 10.9    1.587    1.590    1.587    1.590
 make_images_data                  4572 15.5    0.042    0.045    1.279    1.587
 potential_pw2rs                    119 12.3    0.014    0.015    1.558    1.562
 grid_collocate_task_list           119  9.7    1.519    1.535    1.519    1.535
 wfi_extrapolate                     11  7.9    0.001    0.001    1.493    1.493
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.385    1.392
 mp_alltoall_d11v                  2130 13.8    1.205    1.218    1.205    1.218
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.170    1.187
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.099    1.117
 mp_alltoall_z22v                  1201 16.6    1.046    1.061    1.046    1.061
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.987    1.059
 multiply_cannon_sync_h2d          9144 15.5    1.042    1.046    1.042    1.046
 jit_kernel_multiply                  6 15.5    0.733    1.025    0.733    1.025
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.944    0.956
 qs_create_task_list                 11  7.9    0.000    0.000    0.935    0.947
 generate_qs_task_list               11  8.9    0.367    0.386    0.935    0.947
 acc_transpose_blocks              9144 15.5    0.038    0.039    0.932    0.938
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=43.271000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=696.818182, yerr=12.626222
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             502.272000E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66218.
 MP_Allreduce         9776                    488.
 MP_Sync                52
 MP_Alltoall          1938                1460355.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.031   98.138   98.139
 qs_mol_dyn_low                       1  2.0    0.003    0.003   97.854   97.864
 qs_forces                           11  3.9    0.003    0.003   97.780   97.782
 qs_energies                         11  4.9    0.005    0.006   94.575   94.596
 scf_env_do_scf                      11  5.9    0.001    0.001   84.417   84.420
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   76.470   76.471
 qs_scf_new_mos                      99  7.5    0.000    0.001   54.965   55.155
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   54.965   55.154
 dbcsr_multiply_generic            2055 12.4    0.107    0.113   54.268   54.575
 ot_scf_mini                         99  9.5    0.002    0.002   52.395   52.497
 velocity_verlet                     10  3.0    0.001    0.002   50.468   50.469
 multiply_cannon                   2055 13.4    0.181    0.185   43.262   44.332
 multiply_cannon_loop              2055 14.4    1.533    1.562   41.740   42.898
 ot_mini                             99 10.5    0.001    0.001   28.822   28.942
 qs_ot_get_derivative                99 11.5    0.001    0.001   21.956   22.097
 multiply_cannon_multrec          49320 15.4   12.168   12.836   17.125   17.836
 rebuild_ks_matrix                  110  8.3    0.000    0.000   17.014   17.207
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.016   17.014   17.207
 qs_ks_update_qs_env                110  7.6    0.001    0.001   14.949   15.118
 qs_ot_get_p                        110 10.4    0.001    0.001   14.520   14.648
 mp_waitall_1                    220248 16.4   13.327   14.400   13.327   14.400
 multiply_cannon_sync_h2d         49320 15.4    9.964   10.395    9.964   10.395
 qs_ot_p2m_diag                      48 11.0    0.013    0.019   10.325   10.393
 qs_rho_update_rho_low              110  7.6    0.001    0.001    9.084    9.216
 calculate_rho_elec                 110  8.6    0.022    0.026    9.083    9.216
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    9.163    9.164
 sum_up_and_integrate               110 10.3    0.036    0.043    9.044    9.056
 integrate_v_rspace                 110 11.3    0.003    0.004    9.007    9.029
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    8.110    8.663
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    8.144    8.166
 cp_fm_diag_elpa_base                48 14.0    8.115    8.143    8.141    8.164
 multiply_cannon_metrocomm3       49320 15.4    0.079    0.082    6.670    8.088
 init_scf_loop                       11  6.9    0.000    0.000    7.898    7.899
 init_scf_run                        11  5.9    0.000    0.001    7.844    7.844
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    7.844    7.844
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.281    7.754
 apply_single                       110 13.6    0.000    0.001    7.281    7.754
 rs_pw_transfer                     902 11.9    0.011    0.013    5.950    6.655
 ot_diis_step                        99 11.5    0.005    0.006    6.648    6.649
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    6.435    6.482
 density_rs2pw                      110  9.6    0.004    0.005    5.748    6.311
 make_m2s                          4110 13.4    0.064    0.067    5.741    5.884
 make_images                       4110 14.4    0.179    0.194    5.638    5.782
 mp_sum_l                          6594 12.7    4.811    5.777    4.811    5.777
 pw_transfer                       1331 11.6    0.057    0.066    5.462    5.635
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    5.372    5.543
 prepare_preconditioner              11  7.9    0.000    0.000    5.448    5.465
 make_preconditioner                 11  8.9    0.000    0.000    5.448    5.465
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.183    5.245
 dbcsr_mm_accdrv_process          87628 16.1    2.001    2.071    4.833    5.131
 wfi_extrapolate                     11  7.9    0.001    0.001    5.034    5.035
 fft3d_ps                          1111 14.6    0.786    0.889    4.569    4.722
 potential_pw2rs                    110 12.3    0.006    0.007    4.437    4.527
 fft_wrap_pw1pw2_140                451 13.1    0.446    0.497    4.194    4.371
 multiply_cannon_metrocomm1       49320 15.4    0.065    0.068    3.190    4.203
 calculate_dm_sparse                110  9.5    0.001    0.001    3.962    4.080
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.796    3.803
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    3.672    3.733
 grid_integrate_task_list           110 12.3    3.216    3.489    3.216    3.489
 mp_alltoall_z22v                  1111 16.6    3.212    3.436    3.212    3.436
 mp_alltoall_d11v                  2046 13.8    2.681    3.318    2.681    3.318
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.272    3.315
 cp_fm_cholesky_invert               11 10.9    3.006    3.012    3.006    3.012
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.669    2.674
 make_images_data                  4110 15.4    0.044    0.047    2.376    2.650
 mp_waitany                       14300 13.8    2.036    2.623    2.036    2.623
 jit_kernel_multiply                 13 15.9    2.552    2.587    2.552    2.587
 make_images_sizes                 4110 15.4    0.004    0.004    1.834    2.414
 mp_alltoall_i44                   4110 16.4    1.830    2.410    1.830    2.410
 hybrid_alltoall_any               4261 16.3    0.082    0.476    2.010    2.362
 grid_collocate_task_list           110  9.6    2.154    2.356    2.154    2.356
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    2.201    2.225
 acc_transpose_blocks             49320 15.4    0.211    0.219    2.122    2.169
 mp_sum_d                          3889 11.9    1.530    2.044    1.530    2.044
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=98.139000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=476.818182, yerr=3.214122
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             587.976704E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66437.
 MP_Allreduce         9775                    566.
 MP_Sync                52
 MP_Alltoall          1717                2505149.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.020    0.033   93.146   93.148
 qs_mol_dyn_low                       1  2.0    0.003    0.003   92.828   92.838
 qs_forces                           11  3.9    0.003    0.003   92.749   92.750
 qs_energies                         11  4.9    0.005    0.034   89.016   89.038
 scf_env_do_scf                      11  5.9    0.001    0.001   79.068   79.072
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   65.903   65.904
 velocity_verlet                     10  3.0    0.002    0.009   48.271   48.274
 qs_scf_new_mos                      99  7.5    0.001    0.001   45.465   45.610
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   45.464   45.610
 dbcsr_multiply_generic            2055 12.4    0.119    0.123   44.591   45.373
 ot_scf_mini                         99  9.5    0.003    0.003   43.407   43.568
 multiply_cannon                   2055 13.4    0.223    0.247   34.157   37.049
 multiply_cannon_loop              2055 14.4    0.929    0.954   31.918   34.134
 ot_mini                             99 10.5    0.001    0.001   23.644   23.761
 mp_waitall_1                    176588 16.5   13.021   18.259   13.021   18.259
 qs_ot_get_derivative                99 11.5    0.001    0.001   17.191   17.332
 rebuild_ks_matrix                  110  8.3    0.000    0.000   16.237   16.345
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.016   16.237   16.345
 multiply_cannon_multrec          24660 15.4    6.951    8.981   13.173   15.154
 qs_ks_update_qs_env                110  7.6    0.001    0.001   14.296   14.390
 multiply_cannon_metrocomm3       24660 15.4    0.068    0.071    8.296   13.235
 init_scf_loop                       11  6.9    0.001    0.005   13.105   13.107
 qs_ot_get_p                        110 10.4    0.001    0.001   12.014   12.152
 prepare_preconditioner              11  7.9    0.000    0.001   10.771   10.788
 make_preconditioner                 11  8.9    0.000    0.000   10.771   10.788
 make_full_inverse_cholesky          11  9.9    0.000    0.000   10.256   10.451
 qs_ot_p2m_diag                      48 11.0    0.029    0.044    8.941    8.978
 sum_up_and_integrate               110 10.3    0.052    0.060    8.712    8.723
 integrate_v_rspace                 110 11.3    0.002    0.003    8.660    8.671
 qs_rho_update_rho_low              110  7.6    0.001    0.001    8.460    8.485
 calculate_rho_elec                 110  8.6    0.040    0.048    8.459    8.484
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    8.227    8.230
 multiply_cannon_sync_h2d         24660 15.4    6.471    7.826    6.471    7.826
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.822    7.353
 apply_single                       110 13.6    0.000    0.001    6.822    7.353
 init_scf_run                        11  5.9    0.000    0.001    7.186    7.186
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    7.186    7.186
 make_m2s                          4110 13.4    0.057    0.060    6.347    7.137
 make_images                       4110 14.4    0.401    0.447    6.235    7.021
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    7.007    7.018
 cp_fm_diag_elpa_base                48 14.0    6.926    6.957    7.003    7.014
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    5.934    6.652
 rs_pw_transfer                     902 11.9    0.015    0.042    5.534    6.427
 dbcsr_mm_accdrv_process          52282 16.1    4.592    5.498    6.079    6.392
 density_rs2pw                      110  9.6    0.004    0.005    5.422    6.260
 ot_diis_step                        99 11.5    0.010    0.010    6.234    6.234
 cp_fm_cholesky_invert               11 10.9    5.848    5.860    5.848    5.860
 pw_transfer                       1331 11.6    0.068    0.074    5.286    5.465
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    5.177    5.360
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    5.096    5.160
 mp_sum_l                          6594 12.7    3.481    4.771    3.481    4.771
 wfi_extrapolate                     11  7.9    0.001    0.001    4.490    4.490
 potential_pw2rs                    110 12.3    0.008    0.009    4.352    4.394
 fft3d_ps                          1111 14.6    1.110    1.337    4.157    4.340
 make_images_data                  4110 15.4    0.047    0.051    3.420    4.338
 hybrid_alltoall_any               4261 16.3    0.102    0.445    2.942    3.900
 fft_wrap_pw1pw2_140                451 13.1    0.513    0.532    3.404    3.583
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.514    3.517
 calculate_dm_sparse                110  9.5    0.001    0.001    3.416    3.492
 grid_integrate_task_list           110 12.3    3.121    3.386    3.121    3.386
 cp_fm_cholesky_decompose            22 10.9    3.143    3.151    3.143    3.151
 mp_alltoall_d11v                  2046 13.8    2.596    3.132    2.596    3.132
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.000    3.033
 mp_allgather_i34                  2055 14.4    1.557    2.979    1.557    2.979
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    2.829    2.922
 mp_alltoall_z22v                  1111 16.6    2.628    2.736    2.628    2.736
 calculate_first_density_matrix       1  7.0    0.002    0.015    2.565    2.569
 grid_collocate_task_list           110  9.6    2.094    2.546    2.094    2.546
 mp_waitany                       10164 13.8    1.720    2.497    1.720    2.497
 multiply_cannon_metrocomm4       22605 15.4    0.077    0.083    0.873    2.371
 make_images_sizes                 4110 15.4    0.005    0.005    1.564    2.157
 mp_alltoall_i44                   4110 16.4    1.559    2.152    1.559    2.152
 mp_sum_d                          3889 11.9    1.510    2.148    1.510    2.148
 mp_irecv_dv                      57340 16.2    0.748    2.141    0.748    2.141
 jit_kernel_multiply                 11 16.2    1.133    2.133    1.133    2.133
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    2.111    2.123
 dbcsr_complete_redistribute        325 12.2    0.226    0.286    1.793    2.121
 rs_pw_transfer_RS2PW_140           121 11.5    0.208    0.220    1.147    2.001
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    1.966    1.968
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=93.148000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=556.545455, yerr=6.344126
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             660.340736E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66428.
 MP_Allreduce         9774                    562.
 MP_Sync                52
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.033   71.845   71.846
 qs_mol_dyn_low                       1  2.0    0.003    0.003   71.507   71.517
 qs_forces                           11  3.9    0.003    0.003   71.316   71.318
 qs_energies                         11  4.9    0.001    0.002   67.892   67.896
 scf_env_do_scf                      11  5.9    0.000    0.001   59.129   59.129
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   47.790   47.791
 velocity_verlet                     10  3.0    0.001    0.001   38.863   38.869
 dbcsr_multiply_generic            2055 12.4    0.107    0.112   30.788   31.031
 qs_scf_new_mos                      99  7.5    0.001    0.001   29.990   30.096
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   29.989   30.096
 ot_scf_mini                         99  9.5    0.002    0.002   28.680   28.800
 multiply_cannon                   2055 13.4    0.215    0.226   22.768   24.111
 multiply_cannon_loop              2055 14.4    0.618    0.632   21.223   22.213
 ot_mini                             99 10.5    0.001    0.001   15.289   15.403
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.050   14.183
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   14.050   14.182
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.394   12.513
 mp_waitall_1                    139946 16.5    8.634   11.757    8.634   11.757
 init_scf_loop                       11  6.9    0.000    0.000   11.285   11.286
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.769   10.889
 multiply_cannon_multrec          16440 15.4    3.972    4.992    9.736   10.727
 prepare_preconditioner              11  7.9    0.000    0.000    9.307    9.325
 make_preconditioner                 11  8.9    0.000    0.000    9.307    9.325
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.501    8.943
 qs_ot_get_p                        110 10.4    0.001    0.001    8.265    8.394
 sum_up_and_integrate               110 10.3    0.060    0.062    7.900    7.916
 integrate_v_rspace                 110 11.3    0.003    0.003    7.840    7.856
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.546    7.563
 calculate_rho_elec                 110  8.6    0.059    0.060    7.545    7.562
 multiply_cannon_metrocomm3       16440 15.4    0.042    0.045    4.283    7.380
 init_scf_run                        11  5.9    0.000    0.001    6.223    6.224
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    6.223    6.223
 make_m2s                          4110 13.4    0.050    0.051    5.625    6.160
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    6.083    6.093
 make_images                       4110 14.4    0.394    0.513    5.508    6.042
 dbcsr_mm_accdrv_process          34862 16.1    4.649    5.318    5.618    5.738
 density_rs2pw                      110  9.6    0.004    0.005    4.452    5.736
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    5.637    5.638
 rs_pw_transfer                     902 11.9    0.010    0.012    4.358    5.599
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    5.028    5.456
 apply_single                       110 13.6    0.000    0.000    5.028    5.456
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.851    4.859
 cp_fm_diag_elpa_base                48 14.0    4.759    4.805    4.848    4.856
 pw_transfer                       1331 11.6    0.066    0.075    4.606    4.659
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    4.498    4.552
 ot_diis_step                        99 11.5    0.011    0.011    4.485    4.486
 multiply_cannon_sync_h2d         16440 15.4    3.646    4.228    3.646    4.228
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.574    4.214
 cp_fm_cholesky_invert               11 10.9    4.205    4.214    4.205    4.214
 make_images_data                  4110 15.4    0.043    0.047    3.098    3.963
 hybrid_alltoall_any               4261 16.3    0.106    0.379    2.681    3.811
 wfi_extrapolate                     11  7.9    0.001    0.001    3.620    3.620
 fft_wrap_pw1pw2_140                451 13.1    0.632    0.646    3.515    3.567
 potential_pw2rs                    110 12.3    0.011    0.012    3.443    3.491
 grid_integrate_task_list           110 12.3    3.153    3.463    3.153    3.463
 fft3d_ps                          1111 14.6    1.094    1.104    3.325    3.378
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.022    3.088
 mp_alltoall_d11v                  2046 13.8    2.469    3.017    2.469    3.017
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.952    2.955
 mp_waitany                       17072 13.8    1.430    2.794    1.430    2.794
 calculate_dm_sparse                110  9.5    0.001    0.001    2.620    2.653
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.511    2.566
 grid_collocate_task_list           110  9.6    2.124    2.511    2.124    2.511
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.479    2.480
 mp_sum_l                          6594 12.7    1.782    2.452    1.782    2.452
 dbcsr_complete_redistribute        325 12.2    0.309    0.340    1.891    2.419
 cp_fm_cholesky_decompose            22 10.9    2.378    2.398    2.378    2.398
 multiply_cannon_metrocomm4       14385 15.4    0.044    0.047    0.863    2.387
 rs_pw_transfer_RS2PW_140           121 11.5    0.176    0.181    0.970    2.285
 mp_irecv_dv                      48980 15.7    0.796    2.263    0.796    2.263
 jit_kernel_multiply                 12 16.4    0.579    2.258    0.579    2.258
 mp_alltoall_z22v                  1111 16.6    2.020    2.070    2.020    2.070
 mp_allgather_i34                  2055 14.4    0.857    2.031    0.857    2.031
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.985    1.986
 cp_fm_upper_to_full                 70 14.2    1.457    1.945    1.457    1.945
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.855    1.870
 make_images_sizes                 4110 15.4    0.005    0.005    1.177    1.846
 mp_alltoall_i44                   4110 16.4    1.172    1.841    1.172    1.841
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    1.266    1.775
 rs_gather_matrices                 110 12.3    0.232    0.264    1.153    1.659
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.524    1.534
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.354    1.451
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.416    1.438
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=71.846000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=625.000000, yerr=7.942979
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             732.385280E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66426.
 MP_Allreduce         9774                    603.
 MP_Sync                52
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.041    0.059   75.532   75.534
 qs_mol_dyn_low                       1  2.0    0.003    0.003   75.175   75.186
 qs_forces                           11  3.9    0.003    0.003   74.973   74.974
 qs_energies                         11  4.9    0.007    0.032   71.377   71.387
 scf_env_do_scf                      11  5.9    0.001    0.001   61.906   61.908
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   48.154   48.155
 velocity_verlet                     10  3.0    0.001    0.001   41.689   41.696
 dbcsr_multiply_generic            2055 12.4    0.113    0.118   31.773   32.012
 qs_scf_new_mos                      99  7.5    0.001    0.001   30.894   30.998
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   30.894   30.998
 ot_scf_mini                         99  9.5    0.003    0.003   29.194   29.303
 multiply_cannon                   2055 13.4    0.243    0.264   23.099   24.347
 multiply_cannon_loop              2055 14.4    0.892    0.910   21.574   22.173
 ot_mini                             99 10.5    0.001    0.001   15.696   15.832
 multiply_cannon_multrec          24660 15.4    4.244    6.909   12.896   14.236
 init_scf_loop                       11  6.9    0.001    0.005   13.692   13.695
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.536   13.636
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.017   13.536   13.636
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.974   12.062
 prepare_preconditioner              11  7.9    0.000    0.001   11.777   11.797
 make_preconditioner                 11  8.9    0.001    0.002   11.777   11.797
 qs_ot_get_derivative                99 11.5    0.001    0.001   11.466   11.585
 make_full_inverse_cholesky          11  9.9    0.000    0.000    9.932   11.424
 dbcsr_mm_accdrv_process          52304 16.0    7.094    8.660    8.505    9.400
 qs_ot_get_p                        110 10.4    0.001    0.002    8.225    8.382
 mp_waitall_1                    121746 16.5    5.975    8.091    5.975    8.091
 sum_up_and_integrate               110 10.3    0.068    0.071    7.585    7.599
 integrate_v_rspace                 110 11.3    0.003    0.003    7.517    7.531
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.355    7.365
 calculate_rho_elec                 110  8.6    0.078    0.082    7.355    7.365
 make_m2s                          4110 13.4    0.059    0.062    6.845    7.144
 make_images                       4110 14.4    0.577    0.698    6.703    7.003
 init_scf_run                        11  5.9    0.000    0.001    6.435    6.435
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    6.434    6.435
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    5.946    5.961
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    5.377    5.377
 cp_fm_upper_to_full                 70 14.2    3.461    5.017    3.461    5.017
 density_rs2pw                      110  9.6    0.004    0.004    4.108    4.623
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.599    4.608
 cp_fm_diag_elpa_base                48 14.0    4.408    4.478    4.596    4.605
 pw_transfer                       1331 11.6    0.066    0.077    4.563    4.600
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    4.456    4.497
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.097    4.205
 apply_single                       110 13.6    0.000    0.000    4.096    4.205
 ot_diis_step                        99 11.5    0.011    0.011    4.186    4.186
 rs_pw_transfer                     902 11.9    0.010    0.011    3.708    4.175
 dbcsr_complete_redistribute        325 12.2    0.413    0.466    2.994    4.161
 make_images_data                  4110 15.4    0.046    0.050    3.618    4.108
 hybrid_alltoall_any               4261 16.3    0.120    0.457    2.978    3.888
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.728    3.791
 cp_fm_cholesky_invert               11 10.9    3.767    3.779    3.767    3.779
 fft_wrap_pw1pw2_140                451 13.1    0.666    0.686    3.559    3.603
 qs_ot_get_derivative_diag           47 12.0    0.001    0.002    3.534    3.591
 copy_fm_to_dbcsr                   174 11.2    0.001    0.003    2.346    3.497
 wfi_extrapolate                     11  7.9    0.001    0.001    3.479    3.479
 grid_integrate_task_list           110 12.3    3.232    3.433    3.232    3.433
 multiply_cannon_sync_h2d         24660 15.4    3.140    3.262    3.140    3.262
 multiply_cannon_metrocomm3       24660 15.4    0.037    0.038    1.477    3.254
 fft3d_ps                          1111 14.6    1.090    1.121    3.221    3.242
 calculate_dm_sparse                110  9.5    0.001    0.001    3.127    3.156
 potential_pw2rs                    110 12.3    0.013    0.014    3.070    3.085
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.066    3.068
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.835    2.972
 mp_alltoall_i22                    605 13.7    1.734    2.925    1.734    2.925
 calculate_first_density_matrix       1  7.0    0.001    0.006    2.831    2.833
 mp_alltoall_d11v                  2046 13.8    2.356    2.690    2.356    2.690
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.583    2.622
 grid_collocate_task_list           110  9.6    2.221    2.503    2.221    2.503
 qs_energies_init_hamiltonians       11  5.9    0.002    0.006    2.369    2.371
 cp_fm_cholesky_decompose            22 10.9    2.316    2.365    2.316    2.365
 mp_alltoall_z22v                  1111 16.6    1.914    1.979    1.914    1.979
 jit_kernel_multiply                 10 15.7    1.076    1.975    1.076    1.975
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.845    1.882
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.816    1.826
 mp_waitany                       13376 13.8    1.323    1.790    1.323    1.790
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.694    1.709
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.599    1.696
 mp_sum_l                          6594 12.7    1.147    1.665    1.147    1.665
 make_images_sizes                 4110 15.4    0.005    0.005    0.959    1.649
 mp_alltoall_i44                   4110 16.4    0.955    1.644    0.955    1.644
 mp_allgather_i34                  2055 14.4    0.621    1.624    0.621    1.624
 acc_transpose_blocks             24660 15.4    0.104    0.106    1.592    1.615
 multiply_cannon_metrocomm4       20550 15.4    0.059    0.062    0.844    1.589
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=75.534000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=693.818182, yerr=6.873112
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             836.792320E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66424.
 MP_Allreduce         9774                    644.
 MP_Sync                52
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.032   69.295   69.296
 qs_mol_dyn_low                       1  2.0    0.003    0.003   68.959   68.969
 qs_forces                           11  3.9    0.003    0.003   68.885   68.886
 qs_energies                         11  4.9    0.001    0.002   64.934   64.941
 scf_env_do_scf                      11  5.9    0.000    0.001   55.561   55.561
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   44.758   44.759
 velocity_verlet                     10  3.0    0.001    0.001   38.137   38.141
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.129   27.208
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.128   27.207
 dbcsr_multiply_generic            2055 12.4    0.106    0.111   26.514   26.809
 ot_scf_mini                         99  9.5    0.002    0.002   25.658   25.712
 multiply_cannon                   2055 13.4    0.249    0.264   18.946   20.845
 multiply_cannon_loop              2055 14.4    0.325    0.337   17.302   18.005
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.521   13.545
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   13.521   13.544
 ot_mini                             99 10.5    0.001    0.001   12.892   12.907
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.970   11.989
 init_scf_loop                       11  6.9    0.000    0.000   10.738   10.740
 mp_waitall_1                    103326 16.6    8.405   10.230    8.405   10.230
 qs_ot_get_derivative                99 11.5    0.001    0.001    8.954    9.007
 prepare_preconditioner              11  7.9    0.000    0.000    8.875    8.878
 make_preconditioner                 11  8.9    0.000    0.000    8.875    8.878
 multiply_cannon_multrec           8220 15.4    3.227    4.486    7.636    8.619
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.319    8.477
 qs_ot_get_p                        110 10.4    0.001    0.001    8.385    8.422
 sum_up_and_integrate               110 10.3    0.079    0.081    7.774    7.787
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.745    7.762
 calculate_rho_elec                 110  8.6    0.115    0.115    7.745    7.762
 integrate_v_rspace                 110 11.3    0.003    0.003    7.694    7.706
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    6.340    6.354
 make_m2s                          4110 13.4    0.039    0.041    5.699    6.019
 init_scf_run                        11  5.9    0.000    0.001    5.995    5.995
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    5.995    5.995
 make_images                       4110 14.4    0.638    0.691    5.567    5.887
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    5.866    5.867
 dbcsr_mm_accdrv_process          17442 15.9    2.964    3.750    4.278    5.247
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    5.014    5.022
 cp_fm_diag_elpa_base                48 14.0    4.919    4.973    5.011    5.019
 pw_transfer                       1331 11.6    0.066    0.072    4.956    4.989
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    4.848    4.884
 density_rs2pw                      110  9.6    0.004    0.004    4.226    4.605
 cp_fm_cholesky_invert               11 10.9    4.545    4.551    4.545    4.551
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.018    2.775    4.167
 make_images_data                  4110 15.4    0.038    0.043    3.143    4.004
 fft_wrap_pw1pw2_140                451 13.1    0.830    0.844    3.962    4.002
 ot_diis_step                        99 11.5    0.012    0.012    3.872    3.872
 rs_pw_transfer                     902 11.9    0.010    0.010    3.486    3.823
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.781    3.803
 apply_single                       110 13.6    0.000    0.000    3.781    3.802
 hybrid_alltoall_any               4261 16.3    0.201    0.866    2.885    3.727
 grid_integrate_task_list           110 12.3    3.406    3.598    3.406    3.598
 fft3d_ps                          1111 14.6    1.148    1.168    3.396    3.436
 wfi_extrapolate                     11  7.9    0.001    0.001    3.415    3.415
 potential_pw2rs                    110 12.3    0.015    0.016    3.043    3.077
 multiply_cannon_sync_h2d          8220 15.4    2.893    3.014    2.893    3.014
 multiply_cannon_metrocomm1        8220 15.4    0.021    0.022    2.108    2.922
 calculate_dm_sparse                110  9.5    0.001    0.001    2.791    2.818
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.762    2.765
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.740    2.760
 cp_fm_cholesky_decompose            22 10.9    2.702    2.716    2.702    2.716
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    2.694    2.695
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.657    2.680
 mp_alltoall_d11v                  2046 13.8    2.430    2.677    2.430    2.677
 grid_collocate_task_list           110  9.6    2.324    2.590    2.324    2.590
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.451    2.453
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.312    2.321
 mp_allgather_i34                  2055 14.4    0.819    2.138    0.819    2.138
 mp_alltoall_z22v                  1111 16.6    2.007    2.047    2.007    2.047
 dbcsr_complete_redistribute        325 12.2    0.545    0.597    1.897    2.017
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.766    1.988
 make_images_sizes                 4110 15.4    0.005    0.005    1.136    1.987
 mp_alltoall_i44                   4110 16.4    1.131    1.983    1.131    1.983
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.887    1.891
 mp_waitany                        9240 13.8    1.317    1.756    1.317    1.756
 mp_sum_l                          6594 12.7    1.173    1.660    1.173    1.660
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.532    1.656
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.565    1.574
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.371    1.389
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=69.296000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=791.909091, yerr=9.829959
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.361658E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67104.
 MP_Allreduce         9752                    812.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.043  100.799  100.800
 qs_mol_dyn_low                       1  2.0    0.003    0.003  100.432  100.442
 qs_forces                           11  3.9    0.003    0.003  100.349  100.350
 qs_energies                         11  4.9    0.002    0.002   95.919   95.921
 scf_env_do_scf                      11  5.9    0.001    0.001   85.228   85.229
 velocity_verlet                     10  3.0    0.001    0.001   62.395   62.402
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   53.281   53.283
 init_scf_loop                       11  6.9    0.000    0.000   31.861   31.866
 qs_scf_new_mos                      99  7.5    0.001    0.001   31.803   31.866
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   31.802   31.865
 dbcsr_multiply_generic            2055 12.4    0.118    0.123   31.273   31.357
 ot_scf_mini                         99  9.5    0.002    0.002   29.910   29.952
 prepare_preconditioner              11  7.9    0.000    0.000   29.607   29.618
 make_preconditioner                 11  8.9    0.000    0.000   29.607   29.618
 make_full_inverse_cholesky          11  9.9    0.000    0.000   23.467   29.024
 multiply_cannon                   2055 13.4    0.346    0.362   22.072   23.067
 multiply_cannon_loop              2055 14.4    0.342    0.346   20.072   20.406
 cp_fm_upper_to_full                 70 14.2   13.066   18.948   13.066   18.948
 rebuild_ks_matrix                  110  8.3    0.000    0.000   16.020   16.067
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   16.020   16.067
 ot_mini                             99 10.5    0.001    0.001   15.560   15.604
 qs_ks_update_qs_env                110  7.6    0.001    0.001   14.516   14.558
 dbcsr_complete_redistribute        325 12.2    1.013    1.039    8.035   11.479
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.817   10.875
 mp_waitall_1                     84994 16.7    9.655   10.741    9.655   10.741
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.848   10.280
 multiply_cannon_multrec           8220 15.4    4.369    4.514    9.600    9.712
 qs_rho_update_rho_low              110  7.6    0.001    0.001    9.508    9.550
 calculate_rho_elec                 110  8.6    0.226    0.227    9.508    9.550
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.125    9.512
 mp_alltoall_i22                    605 13.7    5.736    9.187    5.736    9.187
 qs_ot_get_p                        110 10.4    0.001    0.001    8.774    8.827
 sum_up_and_integrate               110 10.3    0.150    0.151    8.751    8.768
 integrate_v_rspace                 110 11.3    0.003    0.003    8.601    8.618
 make_m2s                          4110 13.4    0.043    0.044    7.088    7.689
 make_images                       4110 14.4    0.882    0.928    6.895    7.496
 cp_fm_cholesky_invert               11 10.9    6.985    6.991    6.985    6.991
 qs_ot_p2m_diag                      48 11.0    0.151    0.156    6.723    6.729
 pw_transfer                       1331 11.6    0.075    0.076    6.616    6.622
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    6.498    6.506
 init_scf_run                        11  5.9    0.000    0.001    6.286    6.287
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    6.286    6.286
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    6.178    6.179
 fft_wrap_pw1pw2_140                451 13.1    1.337    1.346    5.414    5.424
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.019    4.994    5.346
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.819    5.334
 apply_single                       110 13.6    0.000    0.000    4.819    5.334
 dbcsr_mm_accdrv_process          11614 15.7    3.342    3.652    5.089    5.311
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    5.293    5.293
 cp_fm_diag_elpa_base                48 14.0    4.660    4.951    5.290    5.290
 density_rs2pw                      110  9.6    0.004    0.004    5.206    5.244
 make_images_data                  4110 15.4    0.041    0.044    4.083    5.166
 hybrid_alltoall_any               4261 16.3    0.256    0.552    3.896    4.907
 ot_diis_step                        99 11.5    0.015    0.016    4.696    4.697
 fft3d_ps                          1111 14.6    1.312    1.322    4.429    4.441
 multiply_cannon_sync_h2d          8220 15.4    3.949    3.952    3.949    3.952
 wfi_extrapolate                     11  7.9    0.001    0.001    3.827    3.827
 grid_integrate_task_list           110 12.3    3.713    3.777    3.713    3.777
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.273    3.757
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.690    3.691
 rs_pw_transfer                     902 11.9    0.010    0.011    3.587    3.616
 potential_pw2rs                    110 12.3    0.021    0.022    3.520    3.526
 calculate_dm_sparse                110  9.5    0.001    0.001    3.287    3.313
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.155    3.192
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.995    2.997
 mp_alltoall_d11v                  2046 13.8    2.886    2.988    2.886    2.988
 cp_fm_cholesky_decompose            22 10.9    2.934    2.953    2.934    2.953
 mp_alltoall_z22v                  1111 16.6    2.863    2.876    2.863    2.876
 grid_collocate_task_list           110  9.6    2.701    2.737    2.701    2.737
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.404    2.421
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.302    2.303
 qs_env_update_s_mstruct             11  6.9    0.001    0.002    2.211    2.268
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.105    2.184
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    2.113    2.117
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    2.010    2.039
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=100.800000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1226.090909, yerr=50.057157
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             632.725504E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955383376
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  57767.
 MP_Allreduce        11084                    796.
 MP_Sync                87
 MP_Alltoall          2226                1802496.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.030  212.551  212.552
 qs_mol_dyn_low                       1  2.0    0.003    0.003  212.085  212.099
 qs_forces                           11  3.9    0.005    0.005  211.996  211.998
 qs_energies                         11  4.9    0.002    0.003  206.247  206.266
 scf_env_do_scf                      11  5.9    0.001    0.002  189.516  189.519
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  168.137  168.139
 qs_scf_new_mos                     117  7.6    0.001    0.001  126.672  126.941
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  126.671  126.940
 velocity_verlet                     10  3.0    0.001    0.001  126.913  126.914
 dbcsr_multiply_generic            2507 12.6    0.177    0.180  125.251  125.812
 ot_scf_mini                        117  9.6    0.003    0.003  120.032  120.304
 multiply_cannon                   2507 13.6    0.239    0.246  101.213  102.669
 multiply_cannon_loop              2507 14.6    2.090    2.134   98.939  100.204
 ot_mini                            117 10.6    0.001    0.001   66.394   66.655
 multiply_cannon_multrec          60168 15.6   33.228   34.699   41.544   43.112
 qs_ot_get_derivative               117 11.6    0.001    0.001   41.623   41.874
 rebuild_ks_matrix                  128  8.3    0.001    0.001   34.477   34.718
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   34.476   34.717
 mp_waitall_1                    267128 16.5   28.984   32.542   28.984   32.542
 qs_ot_get_p                        128 10.4    0.001    0.001   31.603   31.863
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.918   31.160
 multiply_cannon_sync_h2d         60168 15.6   27.465   28.879   27.465   28.879
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.330   25.125
 apply_single                       128 13.6    0.001    0.001   24.330   25.125
 qs_ot_p2m_diag                      83 11.4    0.079    0.091   24.858   24.915
 ot_diis_step                       117 11.6    0.008    0.008   24.540   24.541
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   22.299   22.300
 init_scf_loop                       11  6.9    0.000    0.001   21.290   21.291
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   19.956   20.195
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   19.117   19.153
 cp_fm_diag_elpa_base                83 14.4   19.034   19.071   19.112   19.146
 multiply_cannon_metrocomm3       60168 15.6    0.111    0.115   15.789   17.751
 prepare_preconditioner              11  7.9    0.000    0.000   16.605   16.646
 make_preconditioner                 11  8.9    0.000    0.000   16.605   16.646
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.798   16.000
 sum_up_and_integrate               128 10.3    0.090    0.109   14.812   14.830
 integrate_v_rspace                 128 11.3    0.004    0.005   14.722   14.743
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.621   14.741
 calculate_rho_elec                 128  8.7    0.046    0.065   14.620   14.740
 make_m2s                          5014 13.6    0.105    0.112   14.255   14.676
 make_images                       5014 14.6    0.404    0.423   14.076   14.505
 init_scf_run                        11  5.9    0.000    0.001   12.574   12.574
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   12.573   12.574
 density_rs2pw                      128  9.7    0.006    0.007    7.806   11.092
 rs_pw_transfer                    1046 11.9    0.017    0.018    6.512    9.865
 mp_sum_l                          7950 12.9    8.522    9.504    8.522    9.504
 cp_fm_cholesky_invert               11 10.9    9.473    9.481    9.473    9.481
 wfi_extrapolate                     11  7.9    0.001    0.001    9.260    9.260
 calculate_dm_sparse                128  9.5    0.001    0.001    8.516    8.597
 pw_transfer                       1547 11.6    0.075    0.093    8.266    8.458
 multiply_cannon_metrocomm1       60168 15.6    0.093    0.099    6.122    8.457
 dbcsr_mm_accdrv_process         124484 16.2    3.202    3.329    7.879    8.392
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.014    8.062    8.247
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.012    8.182
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    7.924    8.005
 make_images_data                  5014 15.6    0.067    0.073    7.021    7.832
 grid_integrate_task_list           128 12.3    7.065    7.645    7.065    7.645
 hybrid_alltoall_any               5200 16.5    0.291    2.276    6.130    7.281
 fft_wrap_pw1pw2_140                523 13.2    1.275    1.318    6.855    7.043
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.679    6.691
 fft3d_ps                          1291 14.7    2.140    2.776    5.824    6.154
 mp_waitany                       16020 13.9    2.698    6.095    2.698    6.095
 grid_collocate_task_list           128  9.7    4.688    5.939    4.688    5.939
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.759    5.873
 mp_alltoall_d11v                  2415 14.1    4.570    5.752    4.570    5.752
 rs_pw_transfer_RS2PW_140           139 11.5    0.281    0.293    2.151    5.450
 potential_pw2rs                    128 12.3    0.009    0.011    5.078    5.129
 cp_fm_cholesky_decompose            22 10.9    4.911    4.924    4.911    4.924
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=212.552000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=598.090909, yerr=6.707588
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410023282688       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444707676160       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796579E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.166472E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705500928       0.0%      0.0%    100.0%
 number of processed stacks               5925696       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1131.6
 marketing flops                   143.508480E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             830.775296E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2385600
 MPI messages size (bytes):
  total size                         4.069300E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.705776E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               70188               2295595008
     32768 < size <=   131072              716032              54973693952
    131072 < size <=  4194304             1363760            1386318135296
   4194304 < size <= 16777216              153648            1453843137296
  16777216 < size                           67056            1171888537600
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3997                  58290.
 MP_Allreduce        11071                    960.
 MP_Sync                86
 MP_Alltoall          1955                5616340.
 MP_SendRecv         11938                  47072.
 MP_ISendRecv        11938                  47072.
 MP_Wait             25718
 MP_ISend            11660                 212488.
 MP_IRecv            11660                 212488.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.030  208.952  208.953
 qs_mol_dyn_low                       1  2.0    0.003    0.005  208.522  208.536
 qs_forces                           11  3.9    0.004    0.005  208.365  208.367
 qs_energies                         11  4.9    0.002    0.006  201.212  201.222
 scf_env_do_scf                      11  5.9    0.001    0.001  183.998  184.008
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  146.759  146.761
 velocity_verlet                     10  3.0    0.001    0.002  129.779  129.782
 qs_scf_new_mos                     116  7.6    0.001    0.001  103.151  103.700
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001  103.151  103.699
 dbcsr_multiply_generic            2485 12.5    0.187    0.193   98.851   99.959
 ot_scf_mini                        116  9.6    0.004    0.004   98.313   98.922
 multiply_cannon                   2485 13.5    0.472    0.528   77.396   81.199
 multiply_cannon_loop              2485 14.5    1.235    1.274   73.908   76.323
 ot_mini                            116 10.6    0.001    0.001   51.051   51.594
 mp_waitall_1                    212858 16.6   26.310   39.293   26.310   39.293
 init_scf_loop                       11  6.9    0.000    0.000   37.137   37.139
 multiply_cannon_multrec          29820 15.5   22.087   26.660   31.654   36.482
 rebuild_ks_matrix                  127  8.3    0.001    0.001   35.462   35.800
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.019   35.461   35.799
 prepare_preconditioner              11  7.9    0.000    0.000   32.464   32.542
 make_preconditioner                 11  8.9    0.000    0.000   32.464   32.542
 qs_ks_update_qs_env                127  7.6    0.001    0.001   31.829   32.145
 make_full_inverse_cholesky          11  9.9    0.000    0.000   30.990   31.605
 qs_ot_get_derivative               116 11.6    0.001    0.002   29.257   29.860
 qs_ot_get_p                        127 10.4    0.001    0.001   28.831   29.414
 multiply_cannon_metrocomm3       29820 15.5    0.090    0.094   15.334   27.634
 qs_ot_p2m_diag                      82 11.4    0.185    0.213   23.432   23.471
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   21.866   23.020
 apply_single                       127 13.6    0.001    0.001   21.865   23.020
 cp_dbcsr_syevd                      82 12.4    0.006    0.006   22.130   22.134
 multiply_cannon_sync_h2d         29820 15.5   19.398   21.825   19.398   21.825
 ot_diis_step                       116 11.6    0.014    0.015   21.619   21.623
 cp_fm_cholesky_invert               11 10.9   18.509   18.522   18.509   18.522
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   18.257   18.286
 cp_fm_diag_elpa_base                82 14.4   17.948   18.075   18.250   18.278
 make_m2s                          4970 13.5    0.090    0.095   15.702   17.232
 make_images                       4970 14.5    1.160    1.349   15.494   17.021
 sum_up_and_integrate               127 10.3    0.116    0.132   16.706   16.737
 integrate_v_rspace                 127 11.3    0.004    0.004   16.590   16.626
 qs_rho_update_rho_low              127  7.7    0.001    0.002   16.146   16.185
 calculate_rho_elec                 127  8.7    0.088    0.105   16.145   16.184
 init_scf_run                        11  5.9    0.000    0.001   12.273   12.274
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   12.272   12.274
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002   11.646   12.088
 density_rs2pw                      127  9.7    0.007    0.007    9.403   11.795
 make_images_data                  4970 15.5    0.064    0.073    9.282   11.169
 rs_pw_transfer                    1038 11.9    0.014    0.017    8.753   11.028
 multiply_cannon_metrocomm4       27335 15.5    0.101    0.115    3.617   10.269
 hybrid_alltoall_any               5155 16.4    0.341    1.495    7.821   10.241
 mp_irecv_dv                      68888 16.3    3.422    9.884    3.422    9.884
 pw_transfer                       1535 11.6    0.087    0.094    9.720    9.839
 fft_wrap_pw1pw2                   1281 12.7    0.011    0.012    9.496    9.615
 dbcsr_mm_accdrv_process          61726 16.2    4.429    5.122    9.021    9.603
 wfi_extrapolate                     11  7.9    0.001    0.001    9.109    9.111
 cp_fm_cholesky_decompose            22 10.9    8.870    8.948    8.870    8.948
 fft_wrap_pw1pw2_140                519 13.2    1.321    1.344    7.921    8.063
 grid_integrate_task_list           127 12.3    7.071    7.748    7.071    7.748
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.457    7.216
 potential_pw2rs                    127 12.3    0.016    0.019    6.869    6.953
 fft3d_ps                          1281 14.7    2.793    2.966    6.825    6.922
 calculate_dm_sparse                127  9.5    0.001    0.001    6.509    6.660
 mp_alltoall_d11v                  2401 14.1    4.723    6.286    4.723    6.286
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.266    6.276
 grid_collocate_task_list           127  9.7    4.740    5.919    4.740    5.919
 mp_sum_l                          7884 12.9    3.988    5.916    3.988    5.916
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    5.443    5.516
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.311    5.429
 mp_waitany                       11660 13.9    2.750    5.252    2.750    5.252
 mp_allgather_i34                  2485 14.5    2.053    4.972    2.053    4.972
 dbcsr_complete_redistribute        393 12.7    0.773    0.855    3.654    4.623
 rs_pw_transfer_RS2PW_140           138 11.5    0.349    0.372    2.155    4.507
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=208.953000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=791.909091, yerr=2.274545
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.928533E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               3984192       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1695.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             948.596736E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1042912
 MPI messages size (bytes):
  total size                         2.716210E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.604448E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              281856              36943429632
    131072 < size <=  4194304              660064             996105256960
   4194304 < size <= 16777216               65632             931530938576
  16777216 < size                           28672             751619276800
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58357.
 MP_Allreduce        11057                   1000.
 MP_Sync                87
 MP_Alltoall          1712                9388896.
 MP_SendRecv          7936                  75008.
 MP_ISendRecv         7936                  75008.
 MP_Wait             21820
 MP_ISend            11748                 275205.
 MP_IRecv            11748                 275205.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.031  212.505  212.506
 qs_mol_dyn_low                       1  2.0    0.003    0.003  212.049  212.063
 qs_forces                           11  3.9    0.004    0.004  211.935  211.941
 qs_energies                         11  4.9    0.001    0.002  204.968  204.986
 scf_env_do_scf                      11  5.9    0.001    0.001  188.160  188.160
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  143.762  143.764
 velocity_verlet                     10  3.0    0.001    0.002  134.163  134.166
 qs_scf_new_mos                     117  7.6    0.001    0.001  101.251  101.711
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  101.251  101.711
 ot_scf_mini                        117  9.6    0.003    0.004   96.721   97.236
 dbcsr_multiply_generic            2507 12.6    0.179    0.185   88.861   90.611
 multiply_cannon                   2507 13.6    0.499    0.524   64.719   68.623
 multiply_cannon_loop              2507 14.6    0.861    0.892   60.715   65.575
 ot_mini                            117 10.6    0.001    0.001   48.314   48.879
 init_scf_loop                       11  6.9    0.000    0.000   44.267   44.269
 mp_waitall_1                    170520 16.6   30.253   42.724   30.253   42.724
 prepare_preconditioner              11  7.9    0.000    0.000   39.940   39.998
 make_preconditioner                 11  8.9    0.000    0.000   39.940   39.998
 make_full_inverse_cholesky          11  9.9    0.000    0.000   37.335   38.897
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.331   33.949
 qs_ks_build_kohn_sham_matrix       128  9.3    0.016    0.018   33.330   33.948
 qs_ot_get_p                        128 10.4    0.001    0.001   32.776   33.463
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.028   30.594
 multiply_cannon_metrocomm3       20056 15.6    0.058    0.063   17.590   29.748
 qs_ot_get_derivative               117 11.6    0.001    0.002   27.750   28.259
 qs_ot_p2m_diag                      83 11.4    0.266    0.273   27.212   27.275
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   25.776   25.782
 multiply_cannon_multrec          20056 15.6   13.179   16.598   21.803   25.249
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   20.325   21.651
 apply_single                       128 13.6    0.001    0.001   20.325   21.651
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   21.283   21.292
 cp_fm_diag_elpa_base                83 14.4   20.781   20.989   21.275   21.285
 ot_diis_step                       117 11.6    0.018    0.021   20.341   20.342
 cp_fm_cholesky_invert               11 10.9   20.162   20.176   20.162   20.176
 make_m2s                          5014 13.6    0.080    0.083   17.107   18.292
 make_images                       5014 14.6    1.191    1.284   16.876   18.063
 qs_rho_update_rho_low              128  7.7    0.001    0.001   16.627   16.724
 calculate_rho_elec                 128  8.7    0.132    0.148   16.627   16.724
 sum_up_and_integrate               128 10.3    0.134    0.147   16.675   16.702
 integrate_v_rspace                 128 11.3    0.004    0.004   16.541   16.570
 multiply_cannon_sync_h2d         20056 15.6   13.978   15.858   13.978   15.858
 make_images_data                  5014 15.6    0.059    0.067   10.466   12.276
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   11.544   11.936
 init_scf_run                        11  5.9    0.000    0.001   11.536   11.536
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   11.536   11.536
 density_rs2pw                      128  9.7    0.006    0.007    9.158   11.175
 hybrid_alltoall_any               5200 16.5    0.437    2.006    8.906   10.890
 cp_fm_cholesky_decompose            22 10.9   10.217   10.236   10.217   10.236
 pw_transfer                       1547 11.6    0.088    0.104    9.956   10.123
 rs_pw_transfer                    1046 11.9    0.013    0.016    8.005   10.115
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.012    9.730    9.901
 multiply_cannon_metrocomm4       17549 15.6    0.061    0.071    3.475    9.753
 mp_irecv_dv                      50230 16.2    3.357    9.498    3.357    9.498
 wfi_extrapolate                     11  7.9    0.001    0.001    8.424    8.425
 dbcsr_mm_accdrv_process          41502 16.2    4.592    5.253    8.135    8.248
 fft_wrap_pw1pw2_140                523 13.2    1.412    1.445    8.029    8.214
 cp_fm_upper_to_full                105 14.8    6.077    7.819    6.077    7.819
 grid_integrate_task_list           128 12.3    7.222    7.753    7.222    7.753
 mp_sum_l                          7950 12.9    5.239    7.592    5.239    7.592
 dbcsr_complete_redistribute        395 12.7    1.172    1.203    5.309    7.385
 fft3d_ps                          1291 14.7    2.742    2.974    6.851    6.976
 potential_pw2rs                    128 12.3    0.021    0.024    6.580    6.644
 mp_alltoall_d11v                  2415 14.1    5.251    6.491    5.251    6.491
 calculate_dm_sparse                128  9.5    0.001    0.001    6.155    6.232
 mp_allgather_i34                  2507 14.6    2.504    6.182    2.504    6.182
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    5.338    6.075
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.819    5.893
 grid_collocate_task_list           128  9.7    4.992    5.765    4.992    5.765
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.652    5.661
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    4.704    4.754
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.592    4.752
 mp_waitany                       11748 13.9    2.716    4.748    2.716    4.748
 mp_alltoall_i22                    716 14.1    2.348    4.627    2.348    4.627
 transfer_fm_to_dbcsr                11  9.9    0.020    0.027    2.586    4.568
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=212.506000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=900.636364, yerr=7.713892
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430456039424       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992003932160       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613072052224       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239176077312       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239176077312       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.233020E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.387242E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806383904       0.0%      0.0%    100.0%
 number of processed stacks               6026880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1129.3
 marketing flops                   145.651870E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank               1.140679E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1153224
 MPI messages size (bytes):
  total size                         2.039489E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.768511E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              322096              36390305792
    131072 < size <=  4194304              721976             792118951936
   4194304 < size <= 16777216               70800             669922227920
  16777216 < size                           30960             541065216000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4043                  57632.
 MP_Allreduce        11184                   1079.
 MP_Sync                88
 MP_Alltoall          1724               12509627.
 MP_SendRecv          5934                  75008.
 MP_ISendRecv         5934                  75008.
 MP_Wait             22612
 MP_ISend            15064                 244788.
 MP_IRecv            15064                 244788.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.020    0.036  232.168  232.168
 qs_mol_dyn_low                       1  2.0    0.003    0.003  231.736  231.751
 qs_forces                           11  3.9    0.004    0.005  231.606  231.608
 qs_energies                         11  4.9    0.002    0.002  223.997  224.008
 scf_env_do_scf                      11  5.9    0.001    0.013  205.421  205.428
 velocity_verlet                     10  3.0    0.002    0.008  155.492  155.495
 scf_env_do_scf_inner_loop          118  6.6    0.003    0.008  144.666  144.669
 qs_scf_new_mos                     118  7.6    0.001    0.001  102.868  103.215
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001  102.867  103.215
 ot_scf_mini                        118  9.6    0.003    0.004   97.981   98.273
 dbcsr_multiply_generic            2529 12.6    0.186    0.194   86.748   87.586
 multiply_cannon                   2529 13.6    0.557    0.605   58.233   61.125
 init_scf_loop                       11  6.9    0.000    0.000   60.579   60.584
 prepare_preconditioner              11  7.9    0.000    0.000   56.064   56.088
 make_preconditioner                 11  8.9    0.000    0.000   56.064   56.088
 multiply_cannon_loop              2529 14.6    1.196    1.220   53.052   55.224
 make_full_inverse_cholesky          11  9.9    0.011    0.023   49.284   54.629
 ot_mini                            118 10.6    0.001    0.001   48.146   48.470
 qs_ot_get_p                        129 10.4    0.001    0.001   35.040   35.379
 mp_waitall_1                    149172 16.7   21.969   34.206   21.969   34.206
 rebuild_ks_matrix                  129  8.3    0.001    0.001   32.435   32.747
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.020   32.434   32.747
 multiply_cannon_multrec          30348 15.6   13.814   18.851   26.199   31.104
 qs_ot_p2m_diag                      84 11.4    0.347    0.395   29.782   29.843
 qs_ks_update_qs_env                129  7.6    0.001    0.001   29.192   29.474
 cp_dbcsr_syevd                      84 12.4    0.005    0.006   28.223   28.228
 qs_ot_get_derivative               118 11.6    0.001    0.002   27.250   27.539
 make_m2s                          5058 13.6    0.097    0.100   23.183   25.296
 make_images                       5058 14.6    1.973    2.289   22.874   24.985
 cp_fm_cholesky_invert               11 10.9   24.449   24.471   24.449   24.471
 cp_fm_diag_elpa                     84 13.4    0.000    0.001   23.875   23.887
 cp_fm_diag_elpa_base                84 14.4   22.726   23.124   23.866   23.877
 apply_preconditioner_dbcsr         129 12.6    0.000    0.001   20.152   20.746
 apply_single                       129 13.6    0.001    0.001   20.152   20.746
 ot_diis_step                       118 11.6    0.018    0.021   20.730   20.733
 cp_fm_upper_to_full                106 14.8   12.182   18.022   12.182   18.022
 multiply_cannon_metrocomm3       30348 15.6    0.048    0.051    7.850   17.709
 qs_rho_update_rho_low              129  7.7    0.001    0.001   17.128   17.157
 calculate_rho_elec                 129  8.7    0.177    0.194   17.127   17.156
 sum_up_and_integrate               129 10.3    0.141    0.153   16.707   16.731
 integrate_v_rspace                 129 11.3    0.004    0.004   16.565   16.595
 make_images_data                  5058 15.6    0.062    0.068   12.885   15.903
 dbcsr_complete_redistribute        397 12.7    1.521    1.667   10.082   14.112
 hybrid_alltoall_any               5245 16.5    0.534    2.225   11.325   13.857
 multiply_cannon_sync_h2d         30348 15.6   11.705   12.955   11.705   12.955
 dbcsr_mm_accdrv_process          62780 16.2    7.591    8.690   11.957   12.486
 init_scf_run                        11  5.9    0.000    0.001   12.387   12.388
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   12.387   12.388
 copy_fm_to_dbcsr                   210 11.7    0.001    0.002    8.329   12.351
 cp_fm_cholesky_decompose            22 10.9   12.220   12.320   12.220   12.320
 qs_ot_get_derivative_diag           78 12.4    0.002    0.002   11.525   11.744
 transfer_fm_to_dbcsr                11  9.9    0.001    0.006    6.757   10.712
 pw_transfer                       1559 11.6    0.088    0.102   10.487   10.578
 fft_wrap_pw1pw2                   1301 12.7    0.011    0.012   10.257   10.354
 density_rs2pw                      129  9.7    0.006    0.006    8.842   10.250
 mp_alltoall_i22                    720 14.1    6.081   10.121    6.081   10.121
 wfi_extrapolate                     11  7.9    0.001    0.001    8.909    8.909
 fft_wrap_pw1pw2_140                527 13.2    1.569    1.611    8.734    8.833
 rs_pw_transfer                    1054 12.0    0.014    0.021    7.024    8.523
 grid_integrate_task_list           129 12.3    7.524    8.007    7.524    8.007
 multiply_cannon_metrocomm4       25290 15.6    0.080    0.093    2.858    7.810
 mp_irecv_dv                      76751 16.2    2.713    7.536    2.713    7.536
 mp_alltoall_d11v                  2429 14.1    6.580    7.531    6.580    7.531
 fft3d_ps                          1301 14.7    2.864    2.955    7.158    7.215
 calculate_dm_sparse                129  9.5    0.001    0.001    6.750    6.841
 potential_pw2rs                    129 12.3    0.023    0.024    6.150    6.184
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.903    5.956
 grid_collocate_task_list           129  9.7    5.195    5.883    5.195    5.883
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    5.216    5.295
 mp_allgather_i34                  2529 14.6    3.257    4.824    3.257    4.824
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    4.718    4.786
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.591    4.680
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=232.168000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1077.636364, yerr=17.546161
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410024443904       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444712984576       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796586E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.820059E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705502176       0.0%      0.0%    100.0%
 number of processed stacks               1944496       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3448.5
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               1.527034E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  238560
 MPI messages size (bytes):
  total size                         1.321104E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.537828E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              112800              59139686400
   4194304 < size <= 16777216              104112             545846722560
  16777216 < size                           20064             716108700816
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8852                     52.
 MP_Alltoall          9584                 804353.
 MP_ISend            39716                2104723.
 MP_IRecv            39716                2103824.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  58193.
 MP_Allreduce        11085                   1167.
 MP_Sync                86
 MP_Alltoall          1700               18828148.
 MP_SendRecv          3810                 122880.
 MP_ISendRecv         3810                 122880.
 MP_Wait             16000
 MP_ISend            10600                 423612.
 MP_IRecv            10600                 423612.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.032  198.966  198.967
 qs_mol_dyn_low                       1  2.0    0.003    0.003  198.450  198.464
 qs_forces                           11  3.9    0.004    0.004  198.332  198.337
 qs_energies                         11  4.9    0.002    0.002  190.443  190.454
 scf_env_do_scf                      11  5.9    0.001    0.002  171.784  171.788
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  129.489  129.491
 velocity_verlet                     10  3.0    0.001    0.001  128.045  128.050
 qs_scf_new_mos                     116  7.6    0.001    0.001   87.875   87.983
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   87.874   87.982
 ot_scf_mini                        116  9.6    0.003    0.004   83.283   83.356
 dbcsr_multiply_generic            2485 12.5    0.179    0.190   77.922   78.443
 multiply_cannon                   2485 13.5    0.584    0.626   55.533   61.173
 multiply_cannon_loop              2485 14.5    0.442    0.454   50.609   51.445
 ot_mini                            116 10.6    0.001    0.001   42.311   42.381
 init_scf_loop                       11  6.9    0.000    0.000   42.115   42.118
 prepare_preconditioner              11  7.9    0.000    0.000   37.929   37.951
 make_preconditioner                 11  8.9    0.000    0.000   37.929   37.951
 make_full_inverse_cholesky          11  9.9    0.016    0.025   35.622   36.031
 mp_waitall_1                    124680 16.7   28.672   35.919   28.672   35.919
 rebuild_ks_matrix                  127  8.3    0.001    0.001   31.659   31.772
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.018   31.659   31.772
 qs_ks_update_qs_env                127  7.6    0.001    0.001   28.666   28.770
 qs_ot_get_p                        127 10.4    0.001    0.001   27.780   27.910
 qs_ot_p2m_diag                      82 11.4    0.489    0.495   23.197   23.227
 qs_ot_get_derivative               116 11.6    0.002    0.002   22.434   22.514
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   21.829   21.834
 cp_fm_cholesky_invert               11 10.9   21.623   21.630   21.623   21.630
 multiply_cannon_multrec           9940 15.5   10.271   14.101   17.765   20.888
 make_m2s                          4970 13.5    0.067    0.073   18.296   20.659
 make_images                       4970 14.5    2.292    2.642   17.989   20.345
 ot_diis_step                       116 11.6    0.020    0.020   19.806   19.806
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.405   19.726
 apply_single                       127 13.6    0.001    0.001   19.405   19.725
 multiply_cannon_metrocomm3        9940 15.5    0.023    0.025   11.721   18.706
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   17.986   17.995
 cp_fm_diag_elpa_base                82 14.4   17.658   17.809   17.974   17.983
 qs_rho_update_rho_low              127  7.7    0.001    0.001   17.142   17.184
 calculate_rho_elec                 127  8.7    0.257    0.267   17.142   17.184
 sum_up_and_integrate               127 10.3    0.178    0.189   16.465   16.508
 integrate_v_rspace                 127 11.3    0.004    0.005   16.286   16.339
 make_images_data                  4970 15.5    0.050    0.060   11.029   13.798
 hybrid_alltoall_any               5155 16.4    0.837    3.815   10.556   13.404
 multiply_cannon_sync_h2d          9940 15.5   11.478   11.943   11.478   11.943
 init_scf_run                        11  5.9    0.000    0.001   11.587   11.587
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   11.587   11.587
 pw_transfer                       1535 11.6    0.086    0.094   10.740   10.802
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011   10.517   10.583
 cp_fm_cholesky_decompose            22 10.9   10.325   10.446   10.325   10.446
 density_rs2pw                      127  9.7    0.006    0.006    8.355    9.256
 multiply_cannon_metrocomm1        9940 15.5    0.029    0.029    5.985    9.252
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.135    9.187
 fft_wrap_pw1pw2_140                519 13.2    1.897    1.930    8.981    9.046
 wfi_extrapolate                     11  7.9    0.001    0.001    8.331    8.331
 grid_integrate_task_list           127 12.3    7.722    8.261    7.722    8.261
 mp_allgather_i34                  2485 14.5    3.115    8.203    3.115    8.203
 dbcsr_mm_accdrv_process          20590 16.1    2.631    3.452    7.132    7.807
 mp_alltoall_d11v                  2401 14.1    6.241    7.646    6.241    7.646
 fft3d_ps                          1281 14.7    2.742    2.810    7.018    7.080
 rs_pw_transfer                    1038 11.9    0.012    0.013    5.716    6.685
 calculate_dm_sparse                127  9.5    0.001    0.001    6.480    6.578
 dbcsr_complete_redistribute        393 12.7    2.111    2.174    5.870    6.386
 grid_collocate_task_list           127  9.7    5.425    6.100    5.425    6.100
 potential_pw2rs                    127 12.3    0.027    0.027    5.582    5.637
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.455    5.473
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    5.324    5.325
 multiply_cannon_metrocomm4        7455 15.5    0.023    0.026    1.861    4.280
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.209    4.239
 copy_fm_to_dbcsr                   208 11.6    0.001    0.002    3.813    4.220
 mp_irecv_dv                      28618 15.9    1.830    4.210    1.830    4.210
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.009    4.035
 copy_dbcsr_to_fm                   185 11.7    0.004    0.004    3.911    4.020
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=198.967000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1430.545455, yerr=50.046094
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.696234E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks               1964048       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3439.8
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               3.115741E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  100280
 MPI messages size (bytes):
  total size                         1.136195E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.330227E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               45208              35089547264
   4194304 < size <= 16777216               44352             379752284160
  16777216 < size                           10104             721350232272
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4020                  58886.
 MP_Allreduce        11127                   1502.
 MP_Sync                87
 MP_Alltoall          1712               36974159.
 MP_SendRecv          1792                 218624.
 MP_ISendRecv         1792                 218624.
 MP_Wait              9802
 MP_ISend             6408                1080322.
 MP_IRecv             6408                1080322.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.044  327.116  327.116
 qs_mol_dyn_low                       1  2.0    0.003    0.003  326.465  326.478
 qs_forces                           11  3.9    0.004    0.004  326.145  326.148
 qs_energies                         11  4.9    0.002    0.002  316.291  316.302
 scf_env_do_scf                      11  5.9    0.001    0.002  291.601  291.610
 velocity_verlet                     10  3.0    0.001    0.001  233.665  233.701
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  157.003  157.006
 init_scf_loop                       11  6.9    0.000    0.000  134.303  134.309
 prepare_preconditioner              11  7.9    0.000    0.000  129.019  129.053
 make_preconditioner                 11  8.9    0.000    0.000  129.019  129.053
 make_full_inverse_cholesky          11  9.9    0.035    0.039  104.423  126.010
 qs_scf_new_mos                     117  7.6    0.001    0.001  101.911  102.068
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  101.910  102.067
 ot_scf_mini                        117  9.6    0.004    0.004   96.984   97.040
 dbcsr_multiply_generic            2507 12.6    0.210    0.221   88.018   88.595
 cp_fm_upper_to_full                105 14.8   54.060   77.648   54.060   77.648
 multiply_cannon                   2507 13.6    0.707    0.762   60.595   61.271
 multiply_cannon_loop              2507 14.6    0.472    0.479   56.704   58.093
 ot_mini                            117 10.6    0.001    0.001   47.183   47.240
 dbcsr_complete_redistribute        395 12.7    3.957    4.013   30.863   44.130
 rebuild_ks_matrix                  128  8.3    0.001    0.001   40.969   41.028
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.017   40.968   41.028
 copy_fm_to_dbcsr                   209 11.7    0.001    0.002   27.181   40.424
 cp_fm_cholesky_invert               11 10.9   38.528   38.536   38.528   38.536
 qs_ks_update_qs_env                128  7.6    0.001    0.001   37.585   37.645
 transfer_fm_to_dbcsr                11  9.9    0.030    0.033   24.550   37.524
 mp_alltoall_i22                    716 14.1   22.277   35.570   22.277   35.570
 mp_waitall_1                    103674 16.8   30.978   35.472   30.978   35.472
 qs_ot_get_p                        128 10.4    0.001    0.001   33.789   33.840
 qs_ot_p2m_diag                      83 11.4    0.878    0.884   29.038   29.064
 cp_dbcsr_syevd                      83 12.4    0.006    0.006   27.179   27.183
 qs_ot_get_derivative               117 11.6    0.002    0.002   26.627   26.684
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   23.372   23.373
 cp_fm_diag_elpa_base                83 14.4   18.751   20.581   23.366   23.366
 make_m2s                          5014 13.6    0.077    0.080   22.330   23.313
 qs_rho_update_rho_low              128  7.7    0.001    0.001   22.963   23.001
 calculate_rho_elec                 128  8.7    0.481    0.482   22.962   23.000
 make_images                       5014 14.6    3.767    3.876   21.848   22.829
 sum_up_and_integrate               128 10.3    0.321    0.323   21.543   21.625
 integrate_v_rspace                 128 11.3    0.004    0.004   21.222   21.304
 ot_diis_step                       117 11.6    0.022    0.024   20.512   20.512
 multiply_cannon_metrocomm3       10028 15.6    0.023    0.025   19.267   20.510
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.632   19.899
 apply_single                       128 13.6    0.001    0.001   19.632   19.899
 multiply_cannon_multrec          10028 15.6   10.525   12.252   18.257   18.404
 multiply_cannon_sync_h2d         10028 15.6   15.656   15.665   15.656   15.665
 make_images_data                  5014 15.6    0.057    0.062   12.316   14.610
 init_scf_run                        11  5.9    0.000    0.001   14.378   14.378
 scf_env_initial_rho_setup           11  6.9    0.000    0.000   14.378   14.378
 hybrid_alltoall_any               5200 16.5    1.301    3.033   12.123   14.346
 pw_transfer                       1547 11.6    0.093    0.094   13.722   13.727
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.011   13.486   13.492
 cp_fm_cholesky_decompose            22 10.9   11.758   11.816   11.758   11.816
 fft_wrap_pw1pw2_140                523 13.2    3.071    3.112   11.702   11.718
 mp_alltoall_d11v                  2415 14.1   10.445   11.438   10.445   11.438
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   10.571   10.619
 wfi_extrapolate                     11  7.9    0.001    0.001   10.087   10.087
 density_rs2pw                      128  9.7    0.005    0.006    9.780    9.928
 dbcsr_mm_accdrv_process          20762 16.1    3.754    5.724    7.492    9.366
 grid_integrate_task_list           128 12.3    8.601    8.774    8.601    8.774
 fft3d_ps                          1291 14.7    2.853    2.872    8.563    8.585
 qs_energies_init_hamiltonians       11  5.9    0.002    0.002    8.101    8.103
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    7.418    7.477
 calculate_dm_sparse                128  9.5    0.001    0.001    7.046    7.168
 rs_scatter_matrices                139  9.7    3.698    4.701    6.755    6.971
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    6.497    6.542
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=327.116000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2838.636364, yerr=156.657863
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.259295E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255669.
 MP_Allreduce         3139                   6114.
 MP_Sync                 4
 MP_Alltoall            54               38195936.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.034   86.072   86.073
 qs_energies                          1  2.0    0.000    0.000   85.583   85.589
 ls_scf                               1  3.0    0.000    0.000   84.681   84.687
 dbcsr_multiply_generic             111  6.7    0.016    0.017   73.381   73.536
 multiply_cannon                    111  7.7    0.018    0.020   56.555   57.770
 multiply_cannon_loop               111  8.7    0.208    0.222   53.115   54.566
 ls_scf_main                          1  4.0    0.000    0.000   52.717   52.718
 density_matrix_trs4                  2  5.0    0.002    0.003   47.013   47.104
 ls_scf_init_scf                      1  4.0    0.000    0.001   28.930   28.931
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   27.786   27.834
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   25.660   25.677
 mp_waitall_1                     11031 10.9   22.774   25.643   22.774   25.643
 multiply_cannon_multrec           2664  9.7    8.193    8.866   15.602   17.222
 multiply_cannon_sync_h2d          2664  9.7   13.463   15.951   13.463   15.951
 make_m2s                           222  7.7    0.009    0.011   13.115   13.664
 make_images                        222  8.7    0.101    0.112   13.093   13.645
 multiply_cannon_metrocomm1        2664  9.7    0.009    0.010    9.976   12.678
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.456    9.051
 make_images_data                   222  9.7    0.005    0.006    7.672    8.188
 dbcsr_mm_accdrv_process           4760 10.4    0.509    0.600    7.029    7.994
 hybrid_alltoall_any                227 10.6    0.218    1.846    6.587    7.793
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.320    7.237    6.320    7.237
 calculate_norms                   4752  9.8    5.510    6.178    5.510    6.178
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.076    5.165
 mp_sum_l                           887  5.1    3.179    4.597    3.179    4.597
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.014    2.059    3.691
 mp_irecv_dv                       6231 10.9    2.043    3.667    2.043    3.667
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.369    3.524
 make_images_sizes                  222  9.7    0.000    0.000    0.753    3.424
 mp_alltoall_i44                    222 10.7    0.752    3.424    0.752    3.424
 arnoldi_extremal                     4  6.8    0.000    0.000    3.349    3.374
 arnoldi_normal_ev                    4  7.8    0.001    0.002    3.349    3.374
 build_subspace                      16  8.4    0.009    0.012    3.251    3.254
 ls_scf_post                          1  4.0    0.000    0.000    3.033    3.040
 ls_scf_store_result                  1  5.0    0.000    0.000    2.858    2.894
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.332    2.738
 dbcsr_merge_single_wm              555 10.7    0.461    0.586    2.323    2.729
 make_images_pack                   222  9.7    2.208    2.634    2.210    2.635
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.013    2.366    2.593
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.412    2.472
 dbcsr_sort_data                    658 11.4    2.120    2.456    2.120    2.456
 dbcsr_matrix_vector_mult_local     304 10.0    2.050    2.443    2.052    2.445
 buffer_matrices_ensure_size        222  8.7    1.762    2.133    1.762    2.133
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.814    1.815
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.805    1.806
 qs_ks_build_kohn_sham_matrix         3  8.3    0.002    0.004    1.805    1.806
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=86.073000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1131.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.094031E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3138                  10075.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.047    0.062   92.581   92.582
 qs_energies                          1  2.0    0.000    0.000   92.041   92.059
 ls_scf                               1  3.0    0.000    0.001   90.713   90.731
 dbcsr_multiply_generic             111  6.7    0.016    0.017   76.287   76.672
 multiply_cannon                    111  7.7    0.029    0.041   53.664   57.860
 ls_scf_main                          1  4.0    0.000    0.002   56.037   56.042
 multiply_cannon_loop               111  8.7    0.116    0.125   50.332   53.644
 density_matrix_trs4                  2  5.0    0.002    0.005   50.173   50.400
 ls_scf_init_scf                      1  4.0    0.001    0.003   31.121   31.122
 mp_waitall_1                      9105 10.9   21.672   30.555   21.672   30.555
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   29.880   29.975
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.461   27.473
 multiply_cannon_multrec           1332  9.7   13.187   16.269   22.550   26.829
 multiply_cannon_metrocomm3        1332  9.7    0.007    0.008   11.918   21.138
 make_m2s                           222  7.7    0.008    0.009   15.764   16.482
 make_images                        222  8.7    1.589    1.975   15.733   16.453
 dbcsr_mm_accdrv_process           4041 10.4    0.285    0.456    8.961   10.596
 make_images_data                   222  9.7    0.004    0.005    9.124   10.147
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.553   10.141    8.553   10.141
 hybrid_alltoall_any                227 10.6    0.523    2.485    8.493    9.737
 mp_sum_l                           887  5.1    5.560    8.721    5.560    8.721
 multiply_cannon_metrocomm4        1221  9.7    0.006    0.008    3.249    7.929
 mp_irecv_dv                       3311 11.0    3.230    7.873    3.230    7.873
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.292    6.977
 calculate_norms                   2376  9.8    6.004    6.818    6.004    6.818
 multiply_cannon_sync_h2d          1332  9.7    4.776    6.152    4.776    6.152
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.095    5.290
 arnoldi_extremal                     4  6.8    0.000    0.000    4.942    4.956
 arnoldi_normal_ev                    4  7.8    0.001    0.005    4.942    4.956
 build_subspace                      16  8.4    0.014    0.021    4.690    4.693
 ls_scf_post                          1  4.0    0.002    0.016    3.554    3.571
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.264    3.527
 ls_scf_store_result                  1  5.0    0.000    0.000    3.237    3.365
 dbcsr_matrix_vector_mult_local     304 10.0    2.750    3.221    2.752    3.222
 ls_scf_dm_to_ks                      2  5.0    0.000    0.001    2.678    2.764
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.254    2.574
 mp_allgather_i34                   111  8.7    1.015    2.506    1.015    2.506
 make_images_pack                   222  9.7    2.026    2.402    2.029    2.404
 dbcsr_sort_data                    436 11.2    1.829    2.055    1.829    2.055
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.923    1.925
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.909    1.912
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    1.909    1.912
 make_images_sizes                  222  9.7    0.000    0.000    0.663    1.908
 mp_alltoall_i44                    222 10.7    0.663    1.907    0.663    1.907
 dbcsr_data_new                    4174 10.1    1.619    1.860    1.619    1.860
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=92.582000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1769.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.715693E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265470.
 MP_Allreduce         3138                  10896.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.031    0.041   94.460   94.460
 qs_energies                          1  2.0    0.000    0.000   93.858   93.864
 ls_scf                               1  3.0    0.000    0.000   92.458   92.467
 dbcsr_multiply_generic             111  6.7    0.016    0.017   76.969   77.239
 ls_scf_main                          1  4.0    0.000    0.000   58.009   58.014
 multiply_cannon                    111  7.7    0.043    0.103   53.362   57.374
 multiply_cannon_loop               111  8.7    0.101    0.119   49.757   53.090
 density_matrix_trs4                  2  5.0    0.002    0.003   51.973   52.132
 mp_waitall_1                      7281 11.0   24.085   33.061   24.085   33.061
 ls_scf_init_scf                      1  4.0    0.000    0.001   30.890   30.893
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.680   29.750
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.287   27.301
 multiply_cannon_multrec            888  9.7   12.684   15.170   21.327   24.417
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   11.389   22.044
 make_m2s                           222  7.7    0.007    0.008   17.061   18.276
 make_images                        222  8.7    1.984    2.323   17.022   18.238
 make_images_data                   222  9.7    0.004    0.005    9.776   10.843
 hybrid_alltoall_any                227 10.6    0.623    2.881    9.458   10.683
 dbcsr_mm_accdrv_process           3754 10.4    0.244    0.422    8.187    9.445
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.812    9.023    7.812    9.023
 mp_sum_l                           887  5.1    4.991    8.826    4.991    8.826
 multiply_cannon_sync_h2d           888  9.7    6.098    7.369    6.098    7.369
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.857    6.895
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.440    6.748
 mp_irecv_dv                       2335 11.1    2.425    6.694    2.425    6.694
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.550    6.097
 arnoldi_extremal                     4  6.8    0.000    0.000    5.203    5.222
 arnoldi_normal_ev                    4  7.8    0.001    0.004    5.203    5.222
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.958    5.139
 build_subspace                      16  8.4    0.014    0.020    4.902    4.909
 calculate_norms                   1584  9.8    4.325    4.641    4.325    4.641
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.451    3.792
 mp_allgather_i34                   111  8.7    1.429    3.766    1.429    3.766
 dbcsr_matrix_vector_mult_local     304 10.0    3.010    3.573    3.012    3.575
 ls_scf_post                          1  4.0    0.000    0.000    3.558    3.566
 ls_scf_store_result                  1  5.0    0.000    0.000    3.285    3.380
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.887    3.012
 make_images_sizes                  222  9.7    0.000    0.000    0.944    2.128
 mp_alltoall_i44                    222 10.7    0.944    2.127    0.944    2.127
 make_images_pack                   222  9.7    1.809    2.091    1.812    2.094
 dbcsr_sort_data                    325 11.1    1.881    2.082    1.881    2.082
 dbcsr_data_release                9322 10.9    1.299    2.037    1.299    2.037
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.921    1.923
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.903    1.905
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    1.903    1.905
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=94.460000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2165.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.311141E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3138                  13030.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.035    0.047   97.859   97.860
 qs_energies                          1  2.0    0.000    0.000   97.226   97.235
 ls_scf                               1  3.0    0.000    0.000   95.577   95.587
 dbcsr_multiply_generic             111  6.7    0.017    0.018   78.697   78.937
 ls_scf_main                          1  4.0    0.000    0.000   59.285   59.286
 multiply_cannon                    111  7.7    0.067    0.179   51.732   56.416
 density_matrix_trs4                  2  5.0    0.002    0.003   52.997   53.130
 multiply_cannon_loop               111  8.7    0.115    0.125   46.654   49.346
 ls_scf_init_scf                      1  4.0    0.001    0.005   33.040   33.042
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   31.770   31.845
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.309   29.323
 mp_waitall_1                      6369 11.0   22.356   28.946   22.356   28.946
 multiply_cannon_multrec           1332  9.7   14.319   17.073   22.322   25.430
 make_m2s                           222  7.7    0.008    0.009   20.992   22.463
 make_images                        222  8.7    3.150    3.615   20.940   22.409
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.004    8.830   16.471
 make_images_data                   222  9.7    0.004    0.005   11.633   13.224
 hybrid_alltoall_any                227 10.6    0.803    3.752   10.899   12.681
 dbcsr_mm_accdrv_process           3641 10.4    0.214    0.408    7.641    9.192
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.266    8.768    7.266    8.768
 mp_sum_l                           887  5.1    4.383    8.409    4.383    8.409
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.390    6.685
 multiply_cannon_sync_h2d          1332  9.7    5.524    6.208    5.524    6.208
 multiply_cannon_metrocomm4        1110  9.7    0.004    0.006    2.111    5.998
 mp_irecv_dv                       3229 10.9    2.089    5.913    2.089    5.913
 arnoldi_extremal                     4  6.8    0.000    0.000    5.640    5.662
 arnoldi_normal_ev                    4  7.8    0.001    0.004    5.640    5.662
 build_subspace                      16  8.4    0.014    0.021    5.279    5.286
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.725    5.272
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.597    4.753
 mp_allgather_i34                   111  8.7    2.171    4.638    2.171    4.638
 calculate_norms                   2376  9.8    4.193    4.527    4.193    4.527
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.762    4.024
 dbcsr_matrix_vector_mult_local     304 10.0    3.182    3.651    3.184    3.653
 dbcsr_sort_data                    658 11.4    3.048    3.479    3.048    3.479
 ls_scf_post                          1  4.0    0.000    0.000    3.252    3.259
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.788    3.234
 dbcsr_merge_single_wm              555 10.7    0.532    0.661    2.779    3.225
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.120    3.168
 ls_scf_store_result                  1  5.0    0.000    0.000    2.973    3.036
 dbcsr_data_release               10477 10.7    1.597    2.430    1.597    2.430
 dbcsr_finalize                     304  7.8    0.050    0.061    1.817    2.005
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.986    1.987
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.963    1.965
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.002    1.963    1.965
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=97.860000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2717.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.680999E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265558.
 MP_Allreduce         3129                  15263.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.049    0.074   99.824   99.825
 qs_energies                          1  2.0    0.000    0.000   98.999   99.005
 ls_scf                               1  3.0    0.000    0.000   97.070   97.080
 dbcsr_multiply_generic             111  6.7    0.018    0.018   77.380   77.634
 ls_scf_main                          1  4.0    0.000    0.000   62.291   62.292
 multiply_cannon                    111  7.7    0.081    0.133   54.714   59.983
 density_matrix_trs4                  2  5.0    0.002    0.003   54.914   55.002
 multiply_cannon_loop               111  8.7    0.069    0.076   50.112   51.862
 mp_waitall_1                      5436 11.0   25.543   31.291   25.543   31.291
 ls_scf_init_scf                      1  4.0    0.001    0.001   31.203   31.208
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.844   29.884
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.646   27.658
 multiply_cannon_multrec            444  9.7   14.125   16.465   21.286   23.553
 make_m2s                           222  7.7    0.006    0.007   17.889   20.548
 make_images                        222  8.7    3.727    4.429   17.826   20.485
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.538   14.965
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    5.860   13.744
 make_images_data                   222  9.7    0.003    0.004    9.936   12.707
 hybrid_alltoall_any                227 10.6    0.791    3.776    9.632   12.300
 dbcsr_mm_accdrv_process           3003 10.4    0.177    0.346    6.852    7.990
 multiply_cannon_sync_h2d           444  9.7    6.532    7.989    6.532    7.989
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.534    7.640    6.534    7.640
 mp_allgather_i34                   111  8.7    2.811    7.129    2.811    7.129
 arnoldi_extremal                     4  6.8    0.000    0.000    6.590    6.604
 arnoldi_normal_ev                    4  7.8    0.001    0.004    6.590    6.604
 build_subspace                      16  8.4    0.015    0.019    6.185    6.197
 mp_sum_l                           887  5.1    2.839    5.169    2.839    5.169
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.653    4.791
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.021    4.587    4.680
 dbcsr_matrix_vector_mult_local     304 10.0    3.620    4.044    3.622    4.046
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.689    3.978
 mp_irecv_dv                       1241 11.2    1.671    3.950    1.671    3.950
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.917    3.856
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.702    3.810
 calculate_norms                    792  9.8    3.558    3.691    3.558    3.691
 make_images_sizes                  222  9.7    0.000    0.000    1.281    3.590
 mp_alltoall_i44                    222 10.7    1.281    3.590    1.281    3.590
 ls_scf_post                          1  4.0    0.000    0.000    3.576    3.581
 ls_scf_store_result                  1  5.0    0.000    0.000    3.345    3.404
 dbcsr_finalize                     304  7.8    0.062    0.078    2.202    2.270
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.163    2.164
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.131    2.132
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.002    2.131    2.132
 dbcsr_merge_all                    275  8.9    0.475    0.527    2.050    2.107
 dbcsr_data_release               10123 10.8    1.355    2.010    1.355    2.010
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=99.825000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3678.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.713413E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284111.
 MP_Allreduce         3123                  21388.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.104    0.151  110.710  110.711
 qs_energies                          1  2.0    0.000    0.000  109.140  109.152
 ls_scf                               1  3.0    0.000    0.000  106.175  106.187
 dbcsr_multiply_generic             111  6.7    0.023    0.028   79.082   79.201
 ls_scf_main                          1  4.0    0.000    0.000   66.948   66.949
 density_matrix_trs4                  2  5.0    0.002    0.003   57.441   57.498
 multiply_cannon                    111  7.7    0.174    0.244   51.003   52.956
 multiply_cannon_loop               111  8.7    0.068    0.070   47.467   48.470
 ls_scf_init_scf                      1  4.0    0.001    0.001   35.486   35.486
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   33.793   33.813
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   30.280   30.292
 mp_waitall_1                      4527 11.1   23.215   27.607   23.215   27.607
 make_m2s                           222  7.7    0.007    0.007   24.386   25.414
 make_images                        222  8.7    4.603    5.015   24.276   25.302
 multiply_cannon_multrec            444  9.7   17.919   18.562   22.864   23.406
 hybrid_alltoall_any                227 10.6    1.667    3.631   13.244   16.096
 make_images_data                   222  9.7    0.003    0.004   13.478   16.093
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   11.026   11.863
 multiply_cannon_sync_h2d           444  9.7    8.846    8.892    8.846    8.892
 arnoldi_extremal                     4  6.8    0.000    0.000    7.454    7.464
 arnoldi_normal_ev                    4  7.8    0.002    0.008    7.454    7.464
 build_subspace                      16  8.4    0.026    0.036    6.888    6.898
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.736    6.004
 dbcsr_matrix_vector_mult           304  9.0    0.017    0.034    5.373    5.512
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.395    5.488
 dbcsr_mm_accdrv_process           1814 10.4    0.272    0.974    4.774    5.323
 dbcsr_matrix_vector_mult_local     304 10.0    4.878    5.175    4.880    5.177
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.210    4.350    4.210    4.350
 make_images_sizes                  222  9.7    0.000    0.000    1.544    3.858
 mp_alltoall_i44                    222 10.7    1.544    3.858    1.544    3.858
 ls_scf_post                          1  4.0    0.000    0.000    3.741    3.752
 mp_allgather_i34                   111  8.7    1.155    3.640    1.155    3.640
 ls_scf_store_result                  1  5.0    0.000    0.000    3.430    3.437
 calculate_norms                    792  9.8    3.241    3.279    3.241    3.279
 dbcsr_finalize                     304  7.8    0.082    0.090    3.105    3.192
 dbcsr_merge_all                    275  8.9    0.891    0.914    2.889    2.970
 dbcsr_complete_redistribute          5  7.6    1.430    1.470    2.854    2.953
 qs_energies_init_hamiltonians        1  3.0    0.001    0.002    2.935    2.935
 dbcsr_data_release               12724 10.6    2.335    2.892    2.335    2.892
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.499    2.613
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.589    2.591
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.522    2.524
 qs_ks_build_kohn_sham_matrix         3  8.3    0.003    0.005    2.522    2.524
 dbcsr_sort_data                    325 11.1    2.450    2.522    2.450    2.522
 dbcsr_new_transposed                 4  7.5    0.243    0.252    2.395    2.415
 mp_alltoall_d11v                    48  9.2    2.261    2.309    2.261    2.309
 dbcsr_frobenius_norm                74  6.6    2.059    2.134    2.198    2.243
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=110.711000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=6850.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/186428f4e848f883d21da2d2b78d036968ebedff_performance_tests/27/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32        7009386627072       0.0%      0.0%    100.0%
 flops     9 x     9 x    32        7335108845568       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        9866241589248       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        9884108906496       0.0%      0.0%    100.0%
 flops    22 x    22 x    32       13354440523776       0.0%      0.0%    100.0%
 flops    32 x    32 x     9       20607185977344       0.0%      0.0%    100.0%
 flops    32 x    32 x    22       25186560638976       0.0%      0.0%    100.0%
 flops     9 x    32 x    32       28458319085568       0.0%      0.0%    100.0%
 flops    22 x    32 x    32       34782389993472       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       42881542373376       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       55680402235392       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       55680402235392       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       72328573419520       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       383.054662E+12       0.0%      0.0%    100.0%
 flops max/rank                    733.641090E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                        26899403712       0.0%      0.0%    100.0%
 number of processed stacks             118860288       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     226.3
 marketing flops                   780.439111E+12
 -------------------------------------------------------------------------------
 # multiplications                           1445
 max memory usage/rank             590.143488E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged               102097920
 MPI messages size (bytes):
  total size                        37.227590E+12
  min size                           0.000000E+00
  max size                           4.551360E+06
  average size                     364.626312E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              731472                        0
       128 < size <=     8192            11922720              97670922240
      8192 < size <=    32768            24718992             614677610496
     32768 < size <=   131072            20000256            1970081366016
    131072 < size <=  4194304            42515668           24886801223040
   4194304 < size <= 16777216             2208812            9656099886720
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4640                  76559.
 MP_Allreduce        13232                   2081.
 MP_Sync              1064
 MP_Alltoall          2588              812824902.
 MP_SendRecv        168740                  11136.
 MP_ISendRecv        92040                  11136.
 MP_Wait            102830
 MP_comm_split          40
 MP_ISend            26090                  85106.
 MP_IRecv            37890                  59644.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.044    0.114  339.483  339.484
 qs_mol_dyn_low                       1  2.0    0.006    0.080  338.215  338.241
 qs_forces                            5  3.8    0.006    0.040  337.990  337.993
 qs_energies                          5  4.8    0.005    0.053  334.264  334.289
 scf_env_do_scf                       5  5.8    0.000    0.002  315.698  315.701
 scf_env_do_scf_inner_loop          105  6.6    0.004    0.038  272.325  272.355
 qs_scf_new_mos                     105  7.6    0.000    0.001  214.701  215.083
 qs_scf_loop_do_ot                  105  8.6    0.001    0.001  214.701  215.083
 ot_scf_mini                        105  9.6    0.003    0.005  204.339  204.662
 dbcsr_multiply_generic            1445 12.2    0.124    0.131  142.691  144.447
 velocity_verlet                      4  3.0    0.002    0.028  130.816  130.818
 multiply_cannon                   1445 13.2    0.280    0.297  119.113  123.292
 multiply_cannon_loop              1445 14.2    2.400    2.521  115.868  120.230
 qs_ot_get_p                        112 10.4    0.001    0.002  107.962  108.274
 qs_ot_p2m_diag                      40 11.0    0.020    0.031   94.713   94.814
 cp_dbcsr_syevd                      40 12.0    0.003    0.039   91.078   91.082
 cp_fm_syevd                         40 13.0    0.000    0.004   84.219   84.393
 cp_fm_redistribute_end              40 14.0   39.033   77.871   39.045   77.880
 cp_fm_syevd_base                    40 14.0   38.803   77.649   38.803   77.649
 ot_mini                            105 10.6    0.001    0.003   64.840   64.969
 mp_waitall_1                    488190 16.1   42.992   54.945   42.992   54.945
 multiply_cannon_multrec          69360 15.2   30.773   36.893   38.691   44.406
 init_scf_loop                        7  6.6    0.000    0.007   43.305   43.309
 qs_ot_get_derivative                55 11.6    0.001    0.001   42.868   43.052
 multiply_cannon_metrocomm3       69360 15.2    0.194    0.203   27.932   42.875
 prepare_preconditioner               7  7.6    0.000    0.000   37.102   37.139
 make_preconditioner                  7  8.6    0.000    0.003   37.102   37.139
 rebuild_ks_matrix                  110  8.4    0.000    0.000   36.172   36.379
 qs_ks_build_kohn_sham_matrix       110  9.4    0.012    0.024   36.171   36.379
 multiply_cannon_sync_h2d         69360 15.2   29.465   34.044   29.465   34.044
 qs_ks_update_qs_env                112  7.6    0.001    0.001   33.249   33.442
 qs_rho_update_rho_low              110  7.6    0.001    0.005   29.253   29.546
 calculate_rho_elec                 110  8.6    0.030    0.034   29.252   29.544
 make_full_inverse_cholesky           7  9.6    0.000    0.006   27.558   27.626
 rs_pw_transfer                     690 11.5    0.011    0.020   25.808   27.122
 density_rs2pw                      110  9.6    0.006    0.010   22.182   23.528
 apply_preconditioner_dbcsr          62 12.6    0.000    0.001   23.227   23.471
 apply_single                        62 13.6    0.000    0.000   23.227   23.471
 ot_new_cg_direction                 55 11.6    0.001    0.003   21.205   21.207
 cp_fm_cholesky_invert                7 10.6   18.534   18.556   18.534   18.556
 qs_ot_get_orbitals                 105 10.6    0.001    0.001   16.016   16.379
 qs_ot_get_derivative_taylor         37 12.8    0.001    0.001   15.366   15.506
 init_scf_run                         5  5.8    0.000    0.001   15.434   15.436
 scf_env_initial_rho_setup            5  6.8    0.000    0.005   15.434   15.435
 pw_transfer                       1645 12.4    0.084    0.110   14.513   14.909
 fft_wrap_pw1pw2                   1425 13.5    0.013    0.017   14.369   14.772
 mp_sum_l                          4764 12.2   13.391   14.577   13.391   14.577
 sum_up_and_integrate                60 10.3    0.028    0.030   14.416   14.434
 integrate_v_rspace                  60 11.3    0.002    0.002   14.388   14.407
 multiply_cannon_metrocomm1       69360 15.2    0.096    0.104    5.624   13.102
 calculate_dm_sparse                110  9.5    0.000    0.001   12.041   12.295
 check_diag                          80 13.5    8.588    8.861   12.030   12.188
 fft_wrap_pw1pw2_240                915 15.0    1.193    1.291   11.720   12.098
 qs_ot_get_derivative_diag           18 12.0    0.000    0.001   11.672   11.757
 qs_vxc_create                      110 10.4    0.004    0.046   11.418   11.481
 rs_pw_transfer_RS2PW_30            110 11.6    1.584    1.660   10.670   11.480
 mp_sendrecv_dv                  168740 12.6   11.028   11.176   11.028   11.176
 make_m2s                          2890 13.2    0.083    0.090    9.757   10.452
 make_images                       2890 14.2    0.244    0.265    9.647   10.343
 fft3d_pb                           915 16.0    2.381    2.692    9.769   10.255
 potential_pw2rs                     60 12.3    0.003    0.003    9.830    9.903
 cp_dbcsr_sm_fm_multiply             15  9.3    0.001    0.003    9.790    9.808
 acc_transpose_blocks             69360 15.2    0.359    0.384    8.850    9.394
 calculate_first_density_matrix       1  7.0    0.000    0.006    9.309    9.331
 cp_dbcsr_sm_fm_multiply_core        15 10.3    0.000    0.000    9.142    9.218
 make_full_single_inverse             7  9.6    0.001    0.005    9.039    9.076
 dbcsr_mm_accdrv_process         154766 15.8    4.137    4.297    7.793    8.624
 cp_fm_cholesky_decompose            14 10.2    8.383    8.396    8.383    8.396
 xc_rho_set_and_dset_create         110 12.4    0.076    0.096    7.856    8.113
 xc_vxc_pw_create                    60 11.3    0.039    0.049    7.738    7.801
 mp_alltoall_z22v                  2340 17.7    7.027    7.605    7.027    7.605
 acc_transpose_blocks_kernels     69360 16.2    0.882    0.924    6.913    7.374
 xc_pw_derive                       510 13.4    0.006    0.007    7.038    7.114
 mp_alltoall_d11v                  1300 13.8    6.121    6.976    6.121    6.976
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="601", plot="h2o_512_md", label="(64n/12r/1t)", y=339.484000, yerr=0.000000
PlotPoint: name="602", plot="h2o_512_md_mem", label="(64n/12r/1t)", y=559.800000, yerr=2.993326
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 186428f4e848f883d21da2d2b78d036968ebedff
Summary: empty
Status: OK