=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1,
#              Cray-FFTW 3.3.8.10, COSMA 2.6.6, ELPA 2023.05.001,
#              HDF5 1.12.0, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.2.2, LIBVORI 220621, LIBXSMM 1.17,
#              PLUMED 2.9.0, SIRIUS 7.4.3, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Last update: 13.09.2023
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.6
USE_ELPA       := 2023.05.001
USE_HDF5       := 1.12.0
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.2.2
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.9.0
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.4.3
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.5
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
endif

ifneq ($(USE_HDF5),)
   USE_HDF5       := $(strip $(USE_HDF5))
   HDF5_INC       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/include
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/lib
   CFLAGS         += -I$(HDF5_INC)
   DFLAGS         += -D__HDF5
   LIBS           += $(HDF5_LIB)/libhdf5_fortran.a
   LIBS           += $(HDF5_LIB)/libhdf5_hl.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/01
 job id: 49265212
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/02
 job id: 49265213
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/03
 job id: 49265214
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/04
 job id: 49265215
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/05
 job id: 49265216
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/06
 job id: 49265217
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/07
 job id: 49265218
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/08
 job id: 49265219
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/09
 job id: 49265220
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/10
 job id: 49265221
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/11
 job id: 49265222
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/12
 job id: 49265223
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/13
 job id: 49265225
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/14
 job id: 49265226
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/15
 job id: 49265227
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/16
 job id: 49265228
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/17
 job id: 49265229
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/18
 job id: 49265230
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/19
 job id: 49265231
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/20
 job id: 49265232
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/21
 job id: 49265233
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/22
 job id: 49265234
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/23
 job id: 49265235
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/24
 job id: 49265236
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/25
 job id: 49265237
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/26
 job id: 49265238
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: 512 H2O (4 NVE MD steps on 64 nodes)
 input file: benchmarks/QS/00512_H2O/H2O-512_md.inp
 required files: []
 output file: result.log
 # nodes = 64
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/27
 job id: 49265240
 --- Point ---
 name: 601
 plot: h2o_512_md
 regex: CP2K  
 label: (64n/12r/1t)
 --- Point ---
 name: 602
 plot: h2o_512_md_mem
 regex: Estimated peak process memory 
 label: (64n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md_mem", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          424                      8.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.033  136.191  136.192
 farming_run                          1  2.0  135.419  135.421  136.159  136.162
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.492197E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              230                1134128.
 MP_Allreduce          571                1938539.
 MP_Sync                25
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split          10
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.187  115.899  116.052
 qs_energies                          1  2.0    0.000    0.000  115.600  115.664
 mp2_main                             1  3.0    0.000    0.000  113.360  113.425
 mp2_gpw_main                         1  4.0    0.226    0.232  112.264  112.328
 mp2_ri_gpw_compute_in                1  5.0    0.171    0.173   92.924   93.295
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.004   54.620   54.992
 mp2_eri_3c_integrate_gpw           272  7.0    0.152    0.164   41.001   46.065
 get_2c_integrals                     1  6.0    0.008    0.009   37.218   38.132
 integrate_v_rspace                 273  8.0    0.438    0.453   24.631   29.462
 pw_transfer                       6555 10.6    0.373    0.391   26.761   27.432
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.048   25.392   26.035
 grid_integrate_task_list           273  9.0   20.540   25.828   20.540   25.828
 fft_wrap_pw1pw2_100               2178 12.4    1.119    1.397   22.953   23.582
 rpa_ri_compute_en                    1  5.0    0.027    0.030   19.023   19.289
 compute_2c_integrals                 1  7.0    0.003    0.006   19.144   19.144
 cp_fm_cholesky_decompose            12  8.2   18.080   19.041   18.080   19.041
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.004   18.575   18.844
 mp2_eri_2c_integrate_gpw             1  9.0    2.388    2.432   18.572   18.842
 cholesky_decomp                      1  7.0    0.000    0.000   16.916   17.871
 fft3d_s                           5443 13.4   16.089   16.365   16.112   16.387
 ao_to_mo_and_store_B_mult_1        272  7.0   10.779   15.331   10.779   15.331
 calculate_wavefunction             272  8.0    5.374    5.555   12.281   12.951
 rpa_num_int                          1  6.0    0.005    0.077   10.784   10.860
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.734   10.749
 calc_mat_Q                           8  8.0    0.000    0.000    9.536    9.635
 contract_S_to_Q                      8  9.0    0.000    0.000    8.958    9.058
 calc_potential_gpw                 544  9.5    0.005    0.005    8.243    8.669
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.557    8.655
 parallel_gemm_fm_cosma              14 10.1    8.557    8.655    8.557    8.655
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.002    8.065    8.306
 potential_pw2rs                    545 10.0    0.107    0.108    7.461    8.104
 create_integ_mat                     1  6.0    0.014    0.027    7.766    7.775
 collocate_single_gaussian          272 10.0    0.039    0.042    7.311    7.502
 array2fm                             1  7.0    0.000    0.000    6.694    7.161
 pw_scatter_s                      2720 13.7    4.353    4.550    4.353    4.550
 pw_gather_s                       2722 13.2    3.432    3.839    3.432    3.839
 array2fm_buffer_send                 1  8.0    2.968    3.151    2.968    3.151
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=112.328271, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2812.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          424                      9.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.037  424.681  424.682
 farming_run                          1  2.0  423.288  423.293  424.646  424.648
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827141120       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788822       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.232835E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              757                 478553.
 MP_Allreduce         2021                  21391.
 MP_Sync                37
 MP_Alltoall            77
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.043  217.982  217.983
 qs_energies                          1  2.0    0.000    0.000  217.565  217.580
 scf_env_do_scf                       1  3.0    0.000    0.000  115.237  115.237
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  114.302  114.308
 rebuild_ks_matrix                    4  6.0    0.000    0.000  114.301  114.307
 qs_ks_build_kohn_sham_matrix         4  7.0    0.055    0.061  114.301  114.307
 hfx_ks_matrix                        4  8.0    0.001    0.001  113.946  113.951
 integrate_four_center                4  9.0    0.153    0.465  113.946  113.950
 integrate_four_center_main           4 10.0    0.128    0.507  101.841  105.187
 integrate_four_center_bin          266 11.0  101.713  105.135  101.713  105.135
 mp2_main                             1  3.0    0.000    0.000  102.023  102.039
 mp2_gpw_main                         1  4.0    0.048    0.076  101.103  101.122
 init_scf_loop                        1  4.0    0.000    0.000   96.985   96.985
 mp2_ri_gpw_compute_in                1  5.0    0.063    0.064   73.962   75.239
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   53.894   55.170
 mp2_eri_3c_integrate_gpw            91  7.0    0.142    0.163   41.310   46.288
 integrate_v_rspace                  95  8.0    0.397    0.558   27.794   32.717
 pw_transfer                       2240 10.6    0.144    0.167   29.457   29.798
 mp2_ri_gpw_compute_en                1  5.0    0.059    0.074   26.977   29.344
 ao_to_mo_and_store_B_mult_1         91  7.0   10.913   29.220   10.913   29.220
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.021   28.481   28.816
 grid_integrate_task_list            95  9.0   23.215   28.293   23.215   28.293
 fft_wrap_pw1pw2_100                730 12.4    1.263    1.446   26.239   26.579
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.826    1.886   25.044   25.053
 get_2c_integrals                     1  6.0    0.000    0.000   19.967   20.005
 compute_2c_integrals                 1  7.0    0.002    0.003   18.944   18.950
 fft3d_s                           1823 13.4   18.485   18.860   18.499   18.875
 compute_2c_integrals_loop_lm         1  8.0    0.001    0.001   18.567   18.818
 mp2_eri_2c_integrate_gpw             1  9.0    1.740    1.839   18.566   18.817
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.001   18.250   18.250
 calculate_wavefunction              91  8.0    2.016    2.051    9.619    9.838
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.556    0.582    8.812    9.280
 mp_sync                             37 10.5    4.062    9.149    4.062    9.149
 potential_pw2rs                    186 10.0    0.033    0.035    8.367    8.899
 local_gemm                         172  8.0    8.255    8.720    8.255    8.720
 mp2_ri_gpw_compute_en_comm          22  7.0    0.497    0.517    8.023    8.645
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001    8.107    8.454
 calc_potential_gpw                 182  9.5    0.002    0.002    7.908    8.168
 collocate_single_gaussian           91 10.0    0.017    0.022    7.729    8.023
 integrate_four_center_load           4 10.0    0.000    0.000    6.793    6.797
 hfx_load_balance                     1 11.0    0.000    0.000    6.793    6.797
 mp_sendrecv_dm3                   2068  8.0    6.064    6.679    6.064    6.679
 mp2_ri_gpw_compute_en_ener         172  7.0    6.338    6.397    6.338    6.397
 pw_gather_s                        912 13.2    4.463    5.011    4.463    5.011
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=101.100368, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1493.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             452.390912E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62379.
 MP_Allreduce        10329                    270.
 MP_Sync               530
 MP_Alltoall          2083                 592243.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.020    0.074   55.295   55.299
 qs_mol_dyn_low                       1  2.0    0.003    0.005   54.849   54.856
 qs_forces                           11  3.9    0.003    0.010   54.739   54.741
 qs_energies                         11  4.9    0.003    0.012   53.250   53.265
 scf_env_do_scf                      11  5.9    0.001    0.002   45.357   45.358
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   42.835   42.836
 dbcsr_multiply_generic            2286 12.5    0.093    0.097   33.718   34.188
 qs_scf_new_mos                     108  7.5    0.000    0.001   32.861   33.103
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   32.860   33.103
 ot_scf_mini                        108  9.5    0.002    0.002   31.261   31.443
 multiply_cannon                   2286 13.5    0.255    0.324   26.594   28.151
 multiply_cannon_loop              2286 14.5    1.821    1.926   25.809   27.461
 velocity_verlet                     10  3.0    0.001    0.002   26.497   26.499
 ot_mini                            108 10.5    0.002    0.009   18.860   19.115
 qs_ot_get_derivative               108 11.5    0.001    0.001   15.912   16.103
 mp_waitall_1                    245248 16.5    8.034   13.904    8.034   13.904
 multiply_cannon_metrocomm3       54864 15.5    0.073    0.079    5.785   12.503
 multiply_cannon_multrec          54864 15.5    3.681    5.872    8.291   11.824
 qs_ot_get_p                        119 10.4    0.001    0.001    7.832    8.129
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.862    8.030
 qs_ks_build_kohn_sham_matrix       119  9.3    0.019    0.079    7.862    8.030
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.930    7.078
 mp_sum_l                          7287 12.8    5.146    6.886    5.146    6.886
 init_scf_run                        11  5.9    0.000    0.001    6.572    6.572
 scf_env_initial_rho_setup           11  6.9    0.000    0.004    6.571    6.572
 dbcsr_mm_accdrv_process          76910 16.1    1.841    2.889    4.522    6.323
 multiply_cannon_sync_h2d         54864 15.5    5.171    6.126    5.171    6.126
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.617    6.095
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    5.104    5.134
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    4.949    5.065
 sum_up_and_integrate               119 10.3    0.001    0.003    4.457    4.464
 integrate_v_rspace                 119 11.3    0.002    0.004    4.446    4.453
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    4.400    4.400
 qs_rho_update_rho_low              119  7.7    0.001    0.008    4.166    4.314
 calculate_rho_elec                 119  8.7    0.012    0.022    4.164    4.314
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    4.194    4.194
 cp_fm_redistribute_end              50 14.0    2.139    4.167    2.147    4.170
 cp_fm_diag_elpa_base                50 14.0    2.017    4.055    2.022    4.063
 calculate_first_density_matrix       1  7.0    0.001    0.004    3.910    3.913
 calculate_dm_sparse                119  9.5    0.000    0.000    3.367    3.470
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.324    3.372
 jit_kernel_multiply                 13 15.8    2.617    3.347    2.617    3.347
 apply_preconditioner_dbcsr         119 12.6    0.000    0.001    2.899    3.094
 apply_single                       119 13.6    0.000    0.000    2.899    3.094
 acc_transpose_blocks             54864 15.5    0.231    0.260    2.251    2.805
 ot_diis_step                       108 11.5    0.006    0.008    2.688    2.690
 wfi_extrapolate                     11  7.9    0.048    0.383    2.566    2.567
 multiply_cannon_metrocomm1       54864 15.5    0.056    0.062    1.578    2.504
 init_scf_loop                       11  6.9    0.006    0.047    2.503    2.504
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.350    2.389
 density_rs2pw                      119  9.7    0.004    0.004    2.196    2.283
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.180    2.230
 grid_integrate_task_list           119 12.3    2.015    2.101    2.015    2.101
 mp_sum_d                          4135 12.0    1.406    2.058    1.406    2.058
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.934    1.962
 potential_pw2rs                    119 12.3    0.004    0.004    1.831    1.844
 pw_transfer                       1439 11.6    0.051    0.057    1.684    1.755
 make_m2s                          4572 13.5    0.054    0.056    1.673    1.716
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    1.609    1.682
 make_images                       4572 14.5    0.135    0.140    1.589    1.633
 mp_alltoall_d11v                  2130 13.8    1.315    1.537    1.315    1.537
 transfer_rs2pw                     487 10.6    0.005    0.006    1.415    1.496
 mp_waitany                       12084 13.8    1.274    1.450    1.274    1.450
 acc_transpose_blocks_sync       164592 16.5    1.200    1.438    1.200    1.438
 grid_collocate_task_list           119  9.7    1.349    1.413    1.349    1.413
 fft3d_ps                          1201 14.6    0.370    0.476    1.324    1.387
 transfer_pw2rs                     487 13.2    0.006    0.007    1.336    1.343
 fft_wrap_pw1pw2_140                487 13.2    0.141    0.156    1.246    1.320
 dbcsr_complete_redistribute        329 12.2    0.363    0.580    1.238    1.303
 mp_alltoall_i22                    627 13.8    1.000    1.285    1.000    1.285
 dbcsr_dot_sd                      1205 11.9    0.049    0.060    0.835    1.267
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.085    1.151
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=55.299000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=431.181818, yerr=1.266217
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             488.497152E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10306                    303.
 MP_Sync                54
 MP_Alltoall          2060                1122019.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.007    0.024   40.132   40.133
 qs_mol_dyn_low                       1  2.0    0.003    0.003   39.775   39.782
 qs_forces                           11  3.9    0.002    0.003   39.560   39.561
 qs_energies                         11  4.9    0.002    0.002   37.880   37.883
 scf_env_do_scf                      11  5.9    0.000    0.001   32.585   32.586
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   30.100   30.100
 dbcsr_multiply_generic            2286 12.5    0.101    0.104   22.316   22.671
 qs_scf_new_mos                     108  7.5    0.001    0.001   21.146   21.377
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   21.146   21.377
 ot_scf_mini                        108  9.5    0.002    0.003   20.206   20.371
 velocity_verlet                     10  3.0    0.001    0.001   18.980   18.984
 multiply_cannon                   2286 13.5    0.211    0.220   17.070   18.723
 multiply_cannon_loop              2286 14.5    1.196    1.257   15.864   17.501
 ot_mini                            108 10.5    0.001    0.001   12.428   12.660
 mp_waitall_1                    200699 16.5    5.681   10.902    5.681   10.902
 qs_ot_get_derivative               108 11.5    0.001    0.001   10.029   10.198
 multiply_cannon_metrocomm3       27432 15.5    0.071    0.073    4.151    9.540
 multiply_cannon_multrec          27432 15.5    1.824    4.070    6.266    9.230
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.953    7.091
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.953    7.090
 dbcsr_mm_accdrv_process          47894 16.0    3.548    5.841    4.360    6.901
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.135    6.260
 qs_ot_get_p                        119 10.4    0.001    0.001    4.824    5.040
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.842    4.667
 mp_sum_l                          7287 12.8    2.055    4.160    2.055    4.160
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.005    4.085
 apply_single                       119 13.6    0.000    0.000    3.005    4.085
 init_scf_run                        11  5.9    0.000    0.001    4.083    4.084
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    4.083    4.083
 sum_up_and_integrate               119 10.3    0.001    0.001    3.850    3.856
 integrate_v_rspace                 119 11.3    0.002    0.003    3.836    3.841
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.803    3.825
 calculate_rho_elec                 119  8.7    0.021    0.024    3.802    3.824
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    3.122    3.140
 make_m2s                          4572 13.5    0.052    0.054    2.738    3.081
 make_images                       4572 14.5    0.206    0.245    2.649    2.994
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.720    2.720
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.516    2.517
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.379    2.465
 init_scf_loop                       11  6.9    0.000    0.000    2.462    2.462
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.372    2.372
 ot_diis_step                       108 11.5    0.011    0.011    2.351    2.352
 cp_fm_redistribute_end              50 14.0    1.200    2.340    1.205    2.344
 cp_fm_diag_elpa_base                50 14.0    1.105    2.239    1.134    2.280
 multiply_cannon_sync_h2d         27432 15.5    1.693    2.237    1.693    2.237
 density_rs2pw                      119  9.7    0.004    0.004    2.101    2.179
 calculate_dm_sparse                119  9.5    0.000    0.001    2.079    2.155
 pw_transfer                       1439 11.6    0.065    0.068    1.962    1.998
 acc_transpose_blocks             27432 15.5    0.113    0.119    1.616    1.955
 grid_integrate_task_list           119 12.3    1.846    1.951    1.846    1.951
 jit_kernel_multiply                  9 16.4    0.753    1.937    0.753    1.937
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.871    1.909
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.888    1.890
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.683    1.721
 make_images_data                  4572 15.5    0.048    0.054    1.267    1.609
 potential_pw2rs                    119 12.3    0.006    0.007    1.578    1.587
 prepare_preconditioner              11  7.9    0.000    0.000    1.531    1.557
 make_preconditioner                 11  8.9    0.000    0.000    1.531    1.557
 fft3d_ps                          1201 14.6    0.518    0.574    1.507    1.538
 wfi_extrapolate                     11  7.9    0.001    0.001    1.505    1.505
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.433    1.489
 fft_wrap_pw1pw2_140                487 13.2    0.160    0.169    1.442    1.481
 hybrid_alltoall_any               4725 16.4    0.054    0.117    1.118    1.474
 grid_collocate_task_list           119  9.7    1.290    1.347    1.290    1.347
 mp_alltoall_d11v                  2130 13.8    1.252    1.330    1.252    1.330
 transfer_rs2pw                     487 10.6    0.005    0.006    1.232    1.323
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.256    1.264
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.213    1.261
 mp_sum_d                          4135 12.0    0.561    1.050    0.561    1.050
 mp_allgather_i34                  2286 14.5    0.624    1.027    0.624    1.027
 transfer_pw2rs                     487 13.2    0.005    0.005    0.982    0.989
 acc_transpose_blocks_sync        82296 16.5    0.816    0.942    0.816    0.942
 qs_energies_init_hamiltonians       11  5.9    0.000    0.002    0.930    0.931
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.876    0.889
 acc_transpose_blocks_kernels     27432 16.5    0.189    0.277    0.661    0.869
 make_images_sizes                 4572 15.5    0.005    0.005    0.599    0.848
 mp_alltoall_i44                   4572 16.5    0.594    0.844    0.594    0.844
 mp_alltoall_z22v                  1201 16.6    0.748    0.826    0.748    0.826
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=40.133000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=465.090909, yerr=1.504813
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             519.446528E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63490.
 MP_Allreduce        10155                    305.
 MP_Sync                54
 MP_Alltoall          1821                1607811.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.027   33.370   33.370
 qs_mol_dyn_low                       1  2.0    0.003    0.003   33.171   33.181
 qs_forces                           11  3.9    0.003    0.003   32.832   32.837
 qs_energies                         11  4.9    0.002    0.002   31.263   31.268
 scf_env_do_scf                      11  5.9    0.001    0.001   26.518   26.518
 scf_env_do_scf_inner_loop          108  6.5    0.028    0.075   23.748   23.748
 dbcsr_multiply_generic            2286 12.5    0.095    0.097   17.511   17.611
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.936   15.985
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.936   15.984
 velocity_verlet                     10  3.0    0.001    0.002   15.564   15.573
 ot_scf_mini                        108  9.5    0.002    0.002   15.154   15.192
 multiply_cannon                   2286 13.5    0.196    0.200   14.157   14.795
 multiply_cannon_loop              2286 14.5    0.859    0.897   13.363   14.071
 ot_mini                            108 10.5    0.001    0.001    9.444    9.485
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.957    7.997
 multiply_cannon_multrec          18288 15.5    1.998    3.066    7.240    7.544
 dbcsr_mm_accdrv_process          38222 16.0    5.066    6.090    5.148    6.156
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.013    6.027
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.013    6.027
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.297    5.309
 qs_ot_get_p                        119 10.4    0.001    0.001    3.778    3.801
 mp_waitall_1                    158411 16.6    2.686    3.733    2.686    3.733
 sum_up_and_integrate               119 10.3    0.001    0.001    3.591    3.598
 init_scf_run                        11  5.9    0.000    0.001    3.587    3.587
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.587    3.587
 integrate_v_rspace                 119 11.3    0.003    0.003    3.578    3.586
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.943    3.539
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.327    3.340
 calculate_rho_elec                 119  8.7    0.031    0.031    3.327    3.339
 init_scf_loop                       11  6.9    0.000    0.000    2.753    2.754
 multiply_cannon_metrocomm3       18288 15.5    0.047    0.049    1.609    2.627
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.348    2.349
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.973    2.343
 apply_single                       119 13.6    0.000    0.000    1.973    2.342
 qs_ot_p2m_diag                      50 11.0    0.012    0.013    2.269    2.279
 calculate_dm_sparse                119  9.5    0.000    0.001    2.033    2.050
 make_m2s                          4572 13.5    0.044    0.046    1.879    2.029
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.969    1.969
 make_images                       4572 14.5    0.192    0.205    1.792    1.941
 density_rs2pw                      119  9.7    0.004    0.004    1.853    1.931
 grid_integrate_task_list           119 12.3    1.800    1.893    1.800    1.893
 pw_transfer                       1439 11.6    0.065    0.068    1.881    1.890
 acc_transpose_blocks             18288 15.5    0.079    0.081    1.786    1.866
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.788    1.797
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.730    1.736
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.721    1.732
 cp_fm_diag_elpa_base                50 14.0    1.699    1.711    1.720    1.731
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.673    1.674
 prepare_preconditioner              11  7.9    0.000    0.000    1.651    1.655
 make_preconditioner                 11  8.9    0.000    0.000    1.651    1.655
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.502    1.587
 mp_sum_l                          7287 12.8    1.089    1.536    1.089    1.536
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.507    1.512
 ot_diis_step                       108 11.5    0.011    0.012    1.473    1.473
 fft_wrap_pw1pw2_140                487 13.2    0.212    0.216    1.413    1.423
 potential_pw2rs                    119 12.3    0.007    0.009    1.375    1.378
 fft3d_ps                          1201 14.6    0.526    0.543    1.357    1.366
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.346    1.350
 grid_collocate_task_list           119  9.7    1.234    1.307    1.234    1.307
 wfi_extrapolate                     11  7.9    0.001    0.001    1.188    1.188
 multiply_cannon_sync_h2d         18288 15.5    1.045    1.176    1.045    1.176
 transfer_rs2pw                     487 10.6    0.005    0.005    0.999    1.085
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.979    0.999
 make_images_data                  4572 15.5    0.047    0.051    0.813    0.989
 acc_transpose_blocks_kernels     18288 16.5    0.218    0.226    0.948    0.958
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    0.938    0.943
 hybrid_alltoall_any               4725 16.4    0.058    0.116    0.701    0.907
 multiply_cannon_metrocomm1       18288 15.5    0.030    0.031    0.301    0.860
 mp_alltoall_d11v                  2130 13.8    0.740    0.822    0.740    0.822
 mp_sum_d                          4135 12.0    0.535    0.813    0.535    0.813
 acc_transpose_blocks_sync        54864 16.5    0.739    0.811    0.739    0.811
 transfer_pw2rs                     487 13.2    0.004    0.004    0.807    0.810
 arnoldi_extremal                   119 11.4    0.001    0.002    0.760    0.790
 arnoldi_normal_ev                  119 12.4    0.135    0.358    0.758    0.788
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.785    0.787
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    0.676    0.766
 jit_kernel_multiply                  4 15.0    0.023    0.746    0.023    0.746
 mp_alltoall_z22v                  1201 16.6    0.672    0.738    0.672    0.738
 jit_kernel_transpose                 5 15.6    0.730    0.735    0.730    0.735
 cp_fm_cholesky_invert               11 10.9    0.678    0.681    0.678    0.681
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=33.370000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=495.090909, yerr=0.792527
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             558.026752E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63489.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.029   37.001   37.003
 qs_mol_dyn_low                       1  2.0    0.003    0.004   36.799   36.839
 qs_forces                           11  3.9    0.002    0.003   36.705   36.706
 qs_energies                         11  4.9    0.002    0.002   35.003   35.011
 scf_env_do_scf                      11  5.9    0.000    0.001   29.887   29.888
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   26.462   26.462
 dbcsr_multiply_generic            2286 12.5    0.099    0.102   20.327   20.500
 velocity_verlet                     10  3.0    0.003    0.020   18.557   18.560
 qs_scf_new_mos                     108  7.5    0.001    0.001   18.433   18.482
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   18.432   18.481
 ot_scf_mini                        108  9.5    0.002    0.003   17.385   17.430
 multiply_cannon                   2286 13.5    0.220    0.231   16.634   17.051
 multiply_cannon_loop              2286 14.5    1.517    1.610   15.658   16.033
 ot_mini                            108 10.5    0.001    0.001   10.854   10.921
 multiply_cannon_multrec          27432 15.5    2.513    3.331    9.129    9.565
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.938    8.988
 dbcsr_mm_accdrv_process          47916 15.9    5.957    7.646    6.515    8.060
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.303    6.364
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.303    6.363
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.584    5.637
 qs_ot_get_p                        119 10.4    0.001    0.001    3.658    3.725
 init_scf_run                        11  5.9    0.000    0.001    3.652    3.653
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.652    3.652
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.289    3.639
 sum_up_and_integrate               119 10.3    0.001    0.001    3.484    3.491
 integrate_v_rspace                 119 11.3    0.003    0.003    3.473    3.479
 init_scf_loop                       11  6.9    0.000    0.000    3.405    3.405
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.335    3.370
 calculate_rho_elec                 119  8.7    0.040    0.046    3.335    3.369
 mp_waitall_1                    137007 16.6    2.134    2.793    2.134    2.793
 acc_transpose_blocks             27432 15.5    0.119    0.124    2.422    2.698
 prepare_preconditioner              11  7.9    0.000    0.000    2.568    2.577
 make_preconditioner                 11  8.9    0.000    0.000    2.568    2.577
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.172    2.503
 make_m2s                          4572 13.5    0.055    0.057    2.301    2.428
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.341    2.366
 make_images                       4572 14.5    0.274    0.337    2.191    2.317
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.093    2.262
 apply_single                       119 13.6    0.000    0.000    2.093    2.262
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.245    2.257
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.253    2.255
 calculate_dm_sparse                119  9.5    0.000    0.000    2.179    2.234
 pw_transfer                       1439 11.6    0.065    0.068    1.924    1.958
 grid_integrate_task_list           119 12.3    1.843    1.903    1.843    1.903
 density_rs2pw                      119  9.7    0.004    0.004    1.793    1.887
 ot_diis_step                       108 11.5    0.012    0.013    1.873    1.874
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.832    1.869
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.845    1.846
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.826    1.827
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.633    1.646
 multiply_cannon_metrocomm3       27432 15.5    0.040    0.042    1.063    1.621
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.601    1.611
 cp_fm_diag_elpa_base                50 14.0    1.567    1.584    1.599    1.609
 fft_wrap_pw1pw2_140                487 13.2    0.246    0.260    1.507    1.547
 acc_transpose_blocks_sync        82296 16.5    1.400    1.539    1.400    1.539
 fft3d_ps                          1201 14.6    0.553    0.601    1.349    1.373
 wfi_extrapolate                     11  7.9    0.001    0.001    1.346    1.346
 grid_collocate_task_list           119  9.7    1.250    1.345    1.250    1.345
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.305    1.322
 mp_sum_l                          7287 12.8    0.966    1.293    0.966    1.293
 potential_pw2rs                    119 12.3    0.009    0.010    1.283    1.285
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.261    1.274
 cp_fm_upper_to_full                 72 14.2    0.814    1.161    0.814    1.161
 jit_kernel_multiply                  6 16.0    0.492    1.155    0.492    1.155
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.078    1.079
 acc_transpose_blocks_kernels     27432 16.5    0.272    0.281    0.874    1.058
 dbcsr_complete_redistribute        329 12.2    0.125    0.147    0.781    1.054
 transfer_rs2pw                     487 10.6    0.005    0.005    0.896    0.990
 make_images_data                  4572 15.5    0.048    0.051    0.847    0.987
 hybrid_alltoall_any               4725 16.4    0.065    0.154    0.725    0.879
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.799    0.872
 mp_alltoall_d11v                  2130 13.8    0.736    0.862    0.736    0.862
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.585    0.851
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.808    0.814
 jit_kernel_transpose                 5 15.6    0.601    0.788    0.601    0.788
 cp_fm_cholesky_invert               11 10.9    0.768    0.771    0.768    0.771
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=37.003000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=529.545455, yerr=3.340213
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             636.215296E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63488.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.027   28.803   28.824
 qs_mol_dyn_low                       1  2.0    0.003    0.003   28.637   28.657
 qs_forces                           11  3.9    0.002    0.003   28.558   28.581
 qs_energies                         11  4.9    0.002    0.002   26.862   26.885
 scf_env_do_scf                      11  5.9    0.000    0.001   21.911   21.927
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   19.486   19.499
 velocity_verlet                     10  3.0    0.001    0.002   14.593   14.608
 dbcsr_multiply_generic            2286 12.5    0.093    0.098   13.101   13.171
 qs_scf_new_mos                     108  7.5    0.001    0.001   11.755   11.780
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   11.754   11.779
 ot_scf_mini                        108  9.5    0.002    0.002   11.048   11.072
 multiply_cannon                   2286 13.5    0.224    0.230   10.519   11.014
 multiply_cannon_loop              2286 14.5    0.644    0.663    9.593    9.751
 ot_mini                            108 10.5    0.001    0.001    6.403    6.432
 multiply_cannon_multrec           9144 15.5    1.669    1.852    6.126    6.317
 rebuild_ks_matrix                  119  8.3    0.000    0.000    5.708    5.732
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    5.707    5.732
 qs_ot_get_derivative               108 11.5    0.001    0.001    5.116    5.140
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.079    5.100
 dbcsr_mm_accdrv_process          12550 15.8    3.658    4.358    4.346    4.412
 sum_up_and_integrate               119 10.3    0.001    0.001    3.397    3.402
 integrate_v_rspace                 119 11.3    0.003    0.003    3.387    3.392
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.270    3.276
 calculate_rho_elec                 119  8.7    0.060    0.061    3.270    3.276
 init_scf_run                        11  5.9    0.000    0.001    3.240    3.242
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.240    3.242
 qs_ot_get_p                        119 10.4    0.001    0.001    2.770    2.803
 init_scf_loop                       11  6.9    0.000    0.000    2.406    2.409
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.111    2.113
 make_m2s                          4572 13.5    0.034    0.035    1.826    1.992
 pw_transfer                       1439 11.6    0.065    0.067    1.951    1.958
 grid_integrate_task_list           119 12.3    1.864    1.930    1.864    1.930
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.898    1.916
 make_images                       4572 14.5    0.271    0.302    1.737    1.902
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.858    1.866
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.797    1.799
 density_rs2pw                      119  9.7    0.003    0.003    1.672    1.775
 calculate_dm_sparse                119  9.5    0.000    0.000    1.742    1.763
 mp_waitall_1                    115863 16.7    1.275    1.762    1.275    1.762
 prepare_preconditioner              11  7.9    0.000    0.000    1.685    1.690
 make_preconditioner                 11  8.9    0.000    0.000    1.685    1.690
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.576    1.602
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.564    1.566
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.557    1.558
 fft_wrap_pw1pw2_140                487 13.2    0.323    0.334    1.533    1.544
 acc_transpose_blocks              9144 15.5    0.041    0.042    1.425    1.450
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.404    1.412
 grid_collocate_task_list           119  9.7    1.296    1.390    1.296    1.390
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.332    1.345
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.296    1.303
 cp_fm_diag_elpa_base                50 14.0    1.268    1.285    1.294    1.302
 fft3d_ps                          1201 14.6    0.558    0.570    1.271    1.282
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.274    1.278
 ot_diis_step                       108 11.5    0.013    0.013    1.275    1.276
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.215    1.254
 apply_single                       119 13.6    0.000    0.000    1.215    1.254
 jit_kernel_multiply                  6 15.5    0.648    1.225    0.648    1.225
 potential_pw2rs                    119 12.3    0.010    0.011    1.211    1.214
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.132    1.139
 wfi_extrapolate                     11  7.9    0.001    0.001    1.078    1.079
 hybrid_alltoall_any               4725 16.4    0.065    0.175    0.744    0.976
 make_images_data                  4572 15.5    0.042    0.045    0.767    0.953
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    0.875    0.927
 transfer_rs2pw                     487 10.6    0.004    0.005    0.767    0.859
 cp_fm_cholesky_invert               11 10.9    0.836    0.839    0.836    0.839
 mp_alltoall_d11v                  2130 13.8    0.738    0.818    0.738    0.818
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.810    0.817
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.751    0.754
 qs_env_update_s_mstruct             11  6.9    0.030    0.051    0.707    0.746
 acc_transpose_blocks_sync        27432 16.5    0.723    0.746    0.723    0.746
 mp_allgather_i34                  2286 14.5    0.238    0.661    0.238    0.661
 transfer_pw2rs                     487 13.2    0.003    0.004    0.648    0.650
 acc_transpose_blocks_kernels      9144 16.5    0.118    0.121    0.644    0.646
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.021    0.319    0.646
 mp_alltoall_z22v                  1201 16.6    0.588    0.618    0.588    0.618
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=28.824000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=601.636364, yerr=8.014450
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             769.388544E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63723.
 MP_Allreduce        10154                    429.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.023    0.028   43.608   43.609
 qs_mol_dyn_low                       1  2.0    0.003    0.004   43.123   43.131
 qs_forces                           11  3.9    0.002    0.002   42.991   42.991
 qs_energies                         11  4.9    0.002    0.002   41.000   41.003
 scf_env_do_scf                      11  5.9    0.001    0.001   34.757   34.757
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   26.922   26.923
 velocity_verlet                     10  3.0    0.002    0.002   24.203   24.217
 dbcsr_multiply_generic            2286 12.5    0.108    0.112   19.685   19.840
 qs_scf_new_mos                     108  7.5    0.001    0.001   17.524   17.626
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   17.523   17.626
 multiply_cannon                   2286 13.5    0.300    0.309   15.677   16.530
 ot_scf_mini                        108  9.5    0.002    0.002   16.340   16.445
 multiply_cannon_loop              2286 14.5    0.860    0.885   14.379   15.268
 ot_mini                            108 10.5    0.001    0.001    9.959   10.076
 multiply_cannon_multrec           9144 15.5    3.409    4.729    9.040    9.169
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.876    7.982
 init_scf_loop                       11  6.9    0.000    0.000    7.806    7.807
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.086    7.231
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    7.085    7.230
 prepare_preconditioner              11  7.9    0.000    0.000    6.851    6.866
 make_preconditioner                 11  8.9    0.000    0.000    6.851    6.866
 dbcsr_mm_accdrv_process          12550 15.8    4.698    6.620    5.491    6.761
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.433    6.744
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.395    6.527
 cp_fm_upper_to_full                 72 14.2    3.163    4.521    3.163    4.521
 init_scf_run                        11  5.9    0.000    0.001    4.169    4.169
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    4.169    4.169
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.082    4.136
 calculate_rho_elec                 119  8.7    0.118    0.121    4.082    4.136
 sum_up_and_integrate               119 10.3    0.001    0.001    3.772    3.778
 integrate_v_rspace                 119 11.3    0.003    0.004    3.761    3.768
 mp_waitall_1                     94719 16.7    2.534    3.650    2.534    3.650
 qs_ot_get_p                        119 10.4    0.001    0.001    3.408    3.550
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.969    3.371
 dbcsr_complete_redistribute        329 12.2    0.286    0.292    1.964    2.799
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.632    2.688
 make_m2s                          4572 13.5    0.038    0.038    2.428    2.601
 pw_transfer                       1439 11.6    0.068    0.069    2.584    2.589
 calculate_dm_sparse                119  9.5    0.000    0.000    2.521    2.572
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.235    2.543
 apply_single                       119 13.6    0.000    0.000    2.235    2.543
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.486    2.491
 make_images                       4572 14.5    0.353    0.386    2.306    2.480
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.653    2.472
 multiply_cannon_metrocomm3        9144 15.5    0.021    0.021    1.476    2.372
 mp_alltoall_i22                    627 13.8    1.423    2.237    1.423    2.237
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.412    2.226
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.085    2.137
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.124    2.124
 grid_integrate_task_list           119 12.3    2.077    2.113    2.077    2.113
 fft_wrap_pw1pw2_140                487 13.2    0.576    0.579    2.099    2.106
 density_rs2pw                      119  9.7    0.003    0.003    2.062    2.081
 ot_diis_step                       108 11.5    0.014    0.014    2.059    2.059
 qs_ot_p2m_diag                      50 11.0    0.043    0.044    2.016    2.018
 acc_transpose_blocks              9144 15.5    0.043    0.044    1.968    2.005
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.833    1.843
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.789    1.789
 mp_sum_l                          7287 12.8    1.034    1.746    1.034    1.746
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.662    1.663
 fft3d_ps                          1201 14.6    0.594    0.607    1.582    1.587
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.504    1.555
 grid_collocate_task_list           119  9.7    1.520    1.551    1.520    1.551
 cp_fm_cholesky_invert               11 10.9    1.499    1.502    1.499    1.502
 wfi_extrapolate                     11  7.9    0.001    0.001    1.421    1.421
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.377    1.378
 hybrid_alltoall_any               4725 16.4    0.090    0.151    1.109    1.377
 cp_fm_diag_elpa_base                50 14.0    1.233    1.285    1.376    1.376
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.323    1.341
 mp_alltoall_d11v                  2130 13.8    1.239    1.298    1.239    1.298
 make_images_data                  4572 15.5    0.046    0.049    1.054    1.285
 potential_pw2rs                    119 12.3    0.014    0.015    1.271    1.273
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.119    1.136
 acc_transpose_blocks_sync        27432 16.5    1.098    1.131    1.098    1.131
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    0.997    1.038
 jit_kernel_multiply                  5 15.4    0.764    0.959    0.764    0.959
 qs_create_task_list                 11  7.9    0.008    0.008    0.947    0.958
 generate_qs_task_list               11  8.9    0.367    0.386    0.940    0.951
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.902    0.916
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=43.609000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=725.000000, yerr=12.277104
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             501.538816E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66212.
 MP_Allreduce         9776                    488.
 MP_Sync                52
 MP_Alltoall          1938                1383689.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.009    0.029   84.191   84.193
 qs_mol_dyn_low                       1  2.0    0.003    0.004   83.839   83.850
 qs_forces                           11  3.9    0.003    0.004   83.752   83.753
 qs_energies                         11  4.9    0.002    0.003   80.816   80.839
 scf_env_do_scf                      11  5.9    0.007    0.056   71.454   71.457
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   65.443   65.443
 dbcsr_multiply_generic            2055 12.4    0.106    0.108   51.419   51.707
 qs_scf_new_mos                      99  7.5    0.000    0.001   48.025   48.137
 qs_scf_loop_do_ot                   99  8.5    0.000    0.001   48.024   48.136
 ot_scf_mini                         99  9.5    0.002    0.002   45.633   45.736
 velocity_verlet                     10  3.0    0.001    0.001   44.611   44.611
 multiply_cannon                   2055 13.4    0.187    0.194   42.675   43.618
 multiply_cannon_loop              2055 14.4    1.781    1.828   41.639   42.643
 ot_mini                             99 10.5    0.001    0.001   27.008   27.108
 qs_ot_get_derivative                99 11.5    0.001    0.001   20.189   20.318
 multiply_cannon_multrec          49320 15.4   11.349   12.122   17.433   18.064
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.845   14.992
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.012   14.844   14.992
 qs_ks_update_qs_env                110  7.6    0.001    0.001   13.003   13.132
 mp_waitall_1                    220248 16.4   11.063   12.264   11.063   12.264
 qs_ot_get_p                        110 10.4    0.001    0.001   10.128   10.234
 multiply_cannon_sync_h2d         49320 15.4    9.509   10.095    9.509   10.095
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.939    8.467
 multiply_cannon_metrocomm3       49320 15.4    0.082    0.085    6.467    7.720
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.226    7.710
 apply_single                       110 13.6    0.000    0.001    7.225    7.710
 sum_up_and_integrate               110 10.3    0.002    0.003    7.287    7.341
 integrate_v_rspace                 110 11.3    0.003    0.003    7.262    7.322
 init_scf_run                        11  5.9    0.000    0.001    6.969    6.969
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    6.968    6.969
 qs_ot_p2m_diag                      48 11.0    0.012    0.019    6.791    6.811
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.574    6.733
 calculate_rho_elec                 110  8.6    0.020    0.024    6.573    6.732
 ot_diis_step                        99 11.5    0.006    0.006    6.586    6.587
 dbcsr_mm_accdrv_process          87628 16.1    3.062    3.169    5.955    6.259
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    6.004    6.004
 init_scf_loop                       11  6.9    0.000    0.000    5.972    5.980
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    5.386    5.413
 cp_fm_diag_elpa_base                48 14.0    5.370    5.398    5.383    5.411
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    5.318    5.374
 mp_sum_l                          6594 12.7    4.155    4.937    4.155    4.937
 make_m2s                          4110 13.4    0.061    0.065    4.136    4.255
 wfi_extrapolate                     11  7.9    0.001    0.001    4.185    4.185
 make_images                       4110 14.4    0.179    0.191    4.038    4.160
 calculate_dm_sparse                110  9.5    0.001    0.001    3.808    3.903
 density_rs2pw                      110  9.6    0.004    0.005    3.567    3.812
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.662    3.667
 prepare_preconditioner              11  7.9    0.000    0.000    3.612    3.635
 make_preconditioner                 11  8.9    0.000    0.000    3.612    3.635
 multiply_cannon_metrocomm1       49320 15.4    0.064    0.067    2.574    3.571
 pw_transfer                       1331 11.6    0.055    0.066    3.344    3.464
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.400    3.457
 grid_integrate_task_list           110 12.3    3.259    3.410    3.259    3.410
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.256    3.379
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.232    3.313
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    3.212    3.253
 potential_pw2rs                    110 12.3    0.005    0.006    2.829    2.893
 fft_wrap_pw1pw2_140                451 13.1    0.389    0.427    2.729    2.867
 mp_alltoall_d11v                  2046 13.8    2.213    2.792    2.213    2.792
 jit_kernel_multiply                 13 15.9    2.612    2.732    2.612    2.732
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.679    2.686
 acc_transpose_blocks             49320 15.4    0.214    0.222    2.565    2.662
 fft3d_ps                          1111 14.6    0.792    0.883    2.552    2.646
 grid_collocate_task_list           110  9.6    2.154    2.279    2.154    2.279
 transfer_rs2pw                     451 10.6    0.005    0.006    2.036    2.267
 mp_waitany                       14300 13.8    1.864    2.155    1.864    2.155
 cp_fm_cholesky_invert               11 10.9    2.015    2.020    2.015    2.020
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.976    1.995
 make_images_data                  4110 15.4    0.043    0.048    1.841    1.982
 mp_sum_d                          3889 11.9    1.401    1.903    1.401    1.903
 transfer_pw2rs                     451 13.1    0.006    0.007    1.865    1.884
 hybrid_alltoall_any               4261 16.3    0.085    0.486    1.604    1.849
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.707    1.733
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=84.193000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=476.363636, yerr=2.672171
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             592.056320E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66430.
 MP_Allreduce         9775                    566.
 MP_Sync                52
 MP_Alltoall          1717                1686044.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.045   72.078   72.079
 qs_mol_dyn_low                       1  2.0    0.003    0.004   71.367   71.392
 qs_forces                           11  3.9    0.003    0.004   71.086   71.087
 qs_energies                         11  4.9    0.002    0.003   67.429   67.434
 scf_env_do_scf                      11  5.9    0.000    0.001   57.946   57.950
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   50.147   50.147
 dbcsr_multiply_generic            2055 12.4    0.117    0.123   38.780   39.003
 velocity_verlet                     10  3.0    0.001    0.002   37.139   37.144
 qs_scf_new_mos                      99  7.5    0.001    0.001   34.029   34.173
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   34.029   34.173
 multiply_cannon                   2055 13.4    0.229    0.249   31.837   33.025
 ot_scf_mini                         99  9.5    0.003    0.003   32.339   32.490
 multiply_cannon_loop              2055 14.4    1.168    1.196   30.604   31.712
 ot_mini                             99 10.5    0.001    0.001   18.761   18.919
 multiply_cannon_multrec          24660 15.4    6.993    8.679   14.086   15.777
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.693   13.868
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   13.692   13.868
 qs_ot_get_derivative                99 11.5    0.001    0.001   12.927   13.078
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.044   12.199
 mp_waitall_1                    176588 16.5    7.919   10.658    7.919   10.658
 multiply_cannon_metrocomm3       24660 15.4    0.072    0.074    5.307    8.738
 multiply_cannon_sync_h2d         24660 15.4    6.386    7.782    6.386    7.782
 init_scf_loop                       11  6.9    0.000    0.000    7.762    7.763
 dbcsr_mm_accdrv_process          52282 16.1    5.588    6.392    6.923    7.270
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.484    7.118
 apply_single                       110 13.6    0.000    0.001    6.483    7.118
 qs_ot_get_p                        110 10.4    0.001    0.001    6.857    7.037
 init_scf_run                        11  5.9    0.000    0.001    7.026    7.026
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    7.025    7.026
 sum_up_and_integrate               110 10.3    0.001    0.003    6.490    6.501
 integrate_v_rspace                 110 11.3    0.002    0.003    6.463    6.474
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.047    6.056
 calculate_rho_elec                 110  8.6    0.039    0.047    6.047    6.056
 ot_diis_step                        99 11.5    0.010    0.011    5.784    5.784
 prepare_preconditioner              11  7.9    0.000    0.000    5.720    5.744
 make_preconditioner                 11  8.9    0.000    0.000    5.720    5.744
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.909    5.673
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.312    5.464
 qs_ot_p2m_diag                      48 11.0    0.029    0.044    4.863    4.883
 make_m2s                          4110 13.4    0.057    0.060    4.445    4.788
 make_images                       4110 14.4    0.409    0.465    4.335    4.674
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    4.387    4.387
 pw_transfer                       1331 11.6    0.066    0.074    3.696    3.848
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.769    3.783
 cp_fm_diag_elpa_base                48 14.0    3.720    3.734    3.766    3.780
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    3.589    3.740
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.684    3.686
 wfi_extrapolate                     11  7.9    0.001    0.001    3.634    3.634
 density_rs2pw                      110  9.6    0.004    0.005    3.294    3.475
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.347    3.429
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.307    3.362
 grid_integrate_task_list           110 12.3    3.154    3.330    3.154    3.330
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.285    3.286
 fft_wrap_pw1pw2_140                451 13.1    0.460    0.476    3.005    3.158
 calculate_dm_sparse                110  9.5    0.001    0.001    3.034    3.065
 fft3d_ps                          1111 14.6    1.113    1.344    2.687    2.852
 cp_fm_cholesky_invert               11 10.9    2.758    2.766    2.758    2.766
 make_images_data                  4110 15.4    0.049    0.053    2.390    2.743
 hybrid_alltoall_any               4261 16.3    0.107    0.455    2.049    2.654
 mp_sum_l                          6594 12.7    1.894    2.652    1.894    2.652
 potential_pw2rs                    110 12.3    0.008    0.009    2.361    2.371
 acc_transpose_blocks             24660 15.4    0.114    0.118    2.246    2.366
 grid_collocate_task_list           110  9.6    2.171    2.341    2.171    2.341
 mp_alltoall_d11v                  2046 13.8    1.802    2.211    1.802    2.211
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    2.019    2.035
 jit_kernel_multiply                 10 16.2    0.972    1.971    0.972    1.971
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    1.917    1.918
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.810    1.827
 transfer_rs2pw                     451 10.6    0.006    0.007    1.572    1.801
 build_core_hamiltonian_matrix_      11  4.9    0.039    0.308    1.423    1.751
 multiply_cannon_metrocomm4       22605 15.4    0.077    0.082    0.763    1.721
 mp_sum_d                          3889 11.9    1.241    1.603    1.241    1.603
 mp_allgather_i34                  2055 14.4    0.534    1.592    0.534    1.592
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.581    1.591
 mp_irecv_dv                      57340 16.2    0.637    1.502    0.637    1.502
 mp_waitany                       10164 13.8    1.242    1.465    1.242    1.465
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=72.079000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=560.363636, yerr=6.568142
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             668.819456E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66421.
 MP_Allreduce         9774                    562.
 MP_Sync                52
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.033    0.055   61.760   61.762
 qs_mol_dyn_low                       1  2.0    0.003    0.004   61.477   61.488
 qs_forces                           11  3.9    0.003    0.003   61.235   61.236
 qs_energies                         11  4.9    0.002    0.002   58.036   58.038
 scf_env_do_scf                      11  5.9    0.000    0.001   49.854   49.855
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   41.210   41.211
 velocity_verlet                     10  3.0    0.012    0.023   32.788   32.794
 dbcsr_multiply_generic            2055 12.4    0.106    0.110   30.329   30.546
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.542   26.620
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.542   26.619
 ot_scf_mini                         99  9.5    0.002    0.002   25.291   25.402
 multiply_cannon                   2055 13.4    0.216    0.227   23.269   25.204
 multiply_cannon_loop              2055 14.4    0.819    0.852   22.067   23.847
 ot_mini                             99 10.5    0.001    0.001   15.005   15.113
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.219   12.346
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   12.218   12.346
 multiply_cannon_multrec          16440 15.4    3.764    4.675   10.381   12.249
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.769   10.890
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.613   10.723
 mp_waitall_1                    139946 16.5    7.136   10.470    7.136   10.470
 init_scf_loop                       11  6.9    0.000    0.000    8.609    8.610
 multiply_cannon_metrocomm3       16440 15.4    0.046    0.047    4.508    8.032
 dbcsr_mm_accdrv_process          34862 16.1    5.760    6.800    6.461    7.595
 prepare_preconditioner              11  7.9    0.000    0.000    6.763    6.783
 make_preconditioner                 11  8.9    0.000    0.000    6.763    6.783
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    5.671    6.558
 apply_single                       110 13.6    0.000    0.000    5.670    6.558
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.079    6.441
 sum_up_and_integrate               110 10.3    0.001    0.002    6.271    6.283
 integrate_v_rspace                 110 11.3    0.003    0.003    6.245    6.258
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.787    5.796
 calculate_rho_elec                 110  8.6    0.058    0.059    5.786    5.795
 init_scf_run                        11  5.9    0.000    0.001    5.770    5.771
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    5.770    5.770
 qs_ot_get_p                        110 10.4    0.001    0.001    5.440    5.580
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.666    5.079
 make_m2s                          4110 13.4    0.050    0.051    4.206    4.574
 make_images                       4110 14.4    0.399    0.520    4.091    4.458
 ot_diis_step                        99 11.5    0.011    0.011    4.361    4.361
 multiply_cannon_sync_h2d         16440 15.4    3.263    3.907    3.263    3.907
 qs_ot_p2m_diag                      48 11.0    0.041    0.044    3.784    3.788
 pw_transfer                       1331 11.6    0.065    0.073    3.588    3.598
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.481    3.494
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.416    3.416
 grid_integrate_task_list           110 12.3    3.194    3.395    3.194    3.395
 mp_sum_l                          6594 12.7    2.235    3.276    2.235    3.276
 density_rs2pw                      110  9.6    0.004    0.005    3.012    3.195
 fft_wrap_pw1pw2_140                451 13.1    0.579    0.588    3.000    3.015
 wfi_extrapolate                     11  7.9    0.001    0.001    2.965    2.965
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.886    2.897
 cp_fm_diag_elpa_base                48 14.0    2.821    2.850    2.884    2.896
 calculate_dm_sparse                110  9.5    0.001    0.001    2.855    2.888
 make_images_data                  4110 15.4    0.046    0.050    2.351    2.824
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.753    2.754
 hybrid_alltoall_any               4261 16.3    0.109    0.388    2.025    2.746
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.714    2.715
 cp_fm_cholesky_invert               11 10.9    2.662    2.668    2.662    2.668
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.471    2.527
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.424    2.466
 fft3d_ps                          1111 14.6    1.089    1.097    2.424    2.435
 grid_collocate_task_list           110  9.6    2.223    2.415    2.223    2.415
 multiply_cannon_metrocomm4       14385 15.4    0.048    0.053    0.857    2.324
 mp_irecv_dv                      48980 15.7    0.782    2.188    0.782    2.188
 potential_pw2rs                    110 12.3    0.011    0.012    2.101    2.108
 mp_alltoall_d11v                  2046 13.8    1.753    2.049    1.753    2.049
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.969    1.970
 dbcsr_complete_redistribute        325 12.2    0.331    0.369    1.457    1.940
 cp_fm_upper_to_full                 70 14.2    1.420    1.805    1.420    1.805
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.791    1.802
 acc_transpose_blocks             16440 15.4    0.077    0.078    1.574    1.672
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.647    1.661
 cp_fm_cholesky_decompose            22 10.9    1.588    1.613    1.588    1.613
 mp_allgather_i34                  2055 14.4    0.509    1.529    0.509    1.529
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.371    1.505
 transfer_rs2pw                     451 10.6    0.005    0.006    1.308    1.487
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.985    1.445
 multiply_cannon_metrocomm1       16440 15.4    0.029    0.030    0.371    1.379
 mp_waitany                       17072 13.8    1.143    1.316    1.143    1.316
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.294    1.304
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=61.762000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=632.545455, yerr=10.174510
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             735.055872E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66419.
 MP_Allreduce         9774                    603.
 MP_Sync                52
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.027   68.797   68.798
 qs_mol_dyn_low                       1  2.0    0.003    0.003   68.578   68.590
 qs_forces                           11  3.9    0.003    0.003   67.974   67.975
 qs_energies                         11  4.9    0.002    0.002   64.571   64.575
 scf_env_do_scf                      11  5.9    0.000    0.001   55.284   55.287
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   43.329   43.329
 velocity_verlet                     10  3.0    0.001    0.002   38.558   38.582
 dbcsr_multiply_generic            2055 12.4    0.121    0.128   31.920   32.167
 qs_scf_new_mos                      99  7.5    0.001    0.001   28.486   28.607
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   28.486   28.607
 ot_scf_mini                         99  9.5    0.002    0.003   26.762   26.849
 multiply_cannon                   2055 13.4    0.302    0.355   24.058   25.708
 multiply_cannon_loop              2055 14.4    1.413    1.477   22.466   23.171
 ot_mini                             99 10.5    0.001    0.001   15.174   15.294
 multiply_cannon_multrec          24660 15.4    4.067    6.854   13.281   14.243
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.058   12.162
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   12.057   12.162
 init_scf_loop                       11  6.9    0.000    0.000   11.906   11.906
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.924   11.021
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.657   10.751
 prepare_preconditioner              11  7.9    0.000    0.000   10.106   10.119
 make_preconditioner                 11  8.9    0.000    0.000   10.106   10.119
 dbcsr_mm_accdrv_process          52304 16.0    7.919    9.298    9.055    9.967
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.345    9.782
 mp_waitall_1                    121746 16.5    4.781    6.802    4.781    6.802
 qs_ot_get_p                        110 10.4    0.001    0.001    6.496    6.622
 init_scf_run                        11  5.9    0.000    0.001    6.493    6.493
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    6.492    6.493
 make_m2s                          4110 13.4    0.060    0.062    6.096    6.427
 sum_up_and_integrate               110 10.3    0.001    0.002    6.343    6.356
 integrate_v_rspace                 110 11.3    0.003    0.003    6.317    6.328
 make_images                       4110 14.4    0.580    0.698    5.954    6.280
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.979    5.990
 calculate_rho_elec                 110  8.6    0.078    0.081    5.978    5.989
 cp_fm_upper_to_full                 70 14.2    3.358    4.851    3.358    4.851
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    4.458    4.477
 ot_diis_step                        99 11.5    0.011    0.012    4.207    4.207
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.104    4.192
 apply_single                       110 13.6    0.000    0.000    4.104    4.192
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.875    3.875
 dbcsr_complete_redistribute        325 12.2    0.417    0.462    2.697    3.822
 pw_transfer                       1331 11.6    0.066    0.075    3.763    3.803
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.656    3.699
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.588    3.625
 make_images_data                  4110 15.4    0.049    0.053    3.159    3.602
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.489    3.544
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.513    3.515
 hybrid_alltoall_any               4261 16.3    0.123    0.467    2.606    3.513
 grid_integrate_task_list           110 12.3    3.289    3.408    3.289    3.408
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.327    3.329
 calculate_dm_sparse                110  9.5    0.001    0.001    3.285    3.327
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.269    3.279
 cp_fm_diag_elpa_base                48 14.0    3.119    3.181    3.267    3.277
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.142    3.251
 density_rs2pw                      110  9.6    0.004    0.004    3.076    3.214
 multiply_cannon_metrocomm3       24660 15.4    0.038    0.040    1.456    3.205
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.126    3.166
 fft_wrap_pw1pw2_140                451 13.1    0.610    0.631    3.106    3.151
 wfi_extrapolate                     11  7.9    0.001    0.001    3.061    3.061
 cp_fm_cholesky_invert               11 10.9    2.877    2.886    2.877    2.886
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.752    2.835
 mp_alltoall_i22                    605 13.7    1.660    2.834    1.660    2.834
 acc_transpose_blocks             24660 15.4    0.113    0.115    2.425    2.573
 fft3d_ps                          1111 14.6    1.086    1.116    2.538    2.554
 multiply_cannon_sync_h2d         24660 15.4    2.377    2.516    2.377    2.516
 grid_collocate_task_list           110  9.6    2.269    2.420    2.269    2.420
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.282    2.282
 potential_pw2rs                    110 12.3    0.013    0.013    2.151    2.158
 mp_allgather_i34                  2055 14.4    0.609    2.133    0.609    2.133
 mp_alltoall_d11v                  2046 13.8    1.846    2.061    1.846    2.061
 jit_kernel_multiply                  9 15.9    0.790    1.890    0.790    1.890
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.835    1.868
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.808    1.824
 mp_sum_l                          6594 12.7    1.069    1.816    1.069    1.816
 cp_fm_cholesky_decompose            22 10.9    1.725    1.781    1.725    1.781
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.611    1.708
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.643    1.654
 acc_transpose_blocks_sync        73980 16.4    1.392    1.514    1.392    1.514
 multiply_cannon_metrocomm4       20550 15.4    0.062    0.067    0.859    1.465
 transfer_rs2pw                     451 10.6    0.005    0.006    1.259    1.442
 mp_irecv_dv                      62702 16.1    0.754    1.378    0.754    1.378
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=68.798000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=696.000000, yerr=8.495988
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             855.982080E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66417.
 MP_Allreduce         9774                    644.
 MP_Sync                52
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.039    0.423   58.279   58.647
 qs_mol_dyn_low                       1  2.0    0.003    0.003   57.532   57.546
 qs_forces                           11  3.9    0.003    0.003   57.362   57.363
 qs_energies                         11  4.9    0.004    0.005   53.705   53.710
 scf_env_do_scf                      11  5.9    0.000    0.001   44.596   44.596
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   36.664   36.665
 velocity_verlet                     10  3.0    0.057    0.071   31.726   31.732
 dbcsr_multiply_generic            2055 12.4    0.105    0.110   24.469   24.587
 qs_scf_new_mos                      99  7.5    0.001    0.001   22.063   22.120
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   22.062   22.119
 ot_scf_mini                         99  9.5    0.002    0.002   20.804   20.855
 multiply_cannon                   2055 13.4    0.240    0.251   18.620   19.901
 multiply_cannon_loop              2055 14.4    0.599    0.615   17.349   17.540
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.650   11.690
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   11.649   11.690
 ot_mini                             99 10.5    0.001    0.001   11.586   11.636
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.364   10.402
 multiply_cannon_multrec           8220 15.4    3.426    4.818    8.342    9.471
 mp_waitall_1                    103326 16.6    6.168    8.044    6.168    8.044
 init_scf_loop                       11  6.9    0.000    0.000    7.884    7.886
 qs_ot_get_derivative                99 11.5    0.001    0.001    7.779    7.830
 sum_up_and_integrate               110 10.3    0.001    0.002    6.248    6.259
 prepare_preconditioner              11  7.9    0.000    0.000    6.238    6.245
 make_preconditioner                 11  8.9    0.000    0.000    6.238    6.245
 integrate_v_rspace                 110 11.3    0.003    0.003    6.221    6.232
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.992    6.005
 calculate_rho_elec                 110  8.6    0.114    0.114    5.992    6.005
 dbcsr_mm_accdrv_process          17442 15.9    3.561    4.954    4.776    5.991
 init_scf_run                        11  5.9    0.000    0.001    5.955    5.955
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    5.955    5.955
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.826    5.897
 qs_ot_get_p                        110 10.4    0.001    0.001    5.202    5.264
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.020    3.188    4.631
 make_m2s                          4110 13.4    0.039    0.040    4.340    4.573
 make_images                       4110 14.4    0.652    0.708    4.209    4.440
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.910    3.985
 apply_single                       110 13.6    0.000    0.000    3.910    3.985
 pw_transfer                       1331 11.6    0.066    0.071    3.901    3.913
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.793    3.809
 ot_diis_step                        99 11.5    0.012    0.012    3.782    3.782
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.768    3.773
 grid_integrate_task_list           110 12.3    3.384    3.552    3.384    3.552
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.415    3.416
 fft_wrap_pw1pw2_140                451 13.1    0.777    0.787    3.254    3.268
 density_rs2pw                      110  9.6    0.004    0.004    3.000    3.162
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.104    3.105
 cp_fm_cholesky_invert               11 10.9    3.046    3.050    3.046    3.050
 calculate_dm_sparse                110  9.5    0.001    0.001    2.807    2.852
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.812    2.820
 cp_fm_diag_elpa_base                48 14.0    2.755    2.777    2.810    2.818
 wfi_extrapolate                     11  7.9    0.001    0.001    2.753    2.753
 hybrid_alltoall_any               4261 16.3    0.201    0.855    2.295    2.692
 qs_energies_init_hamiltonians       11  5.9    0.004    0.009    2.680    2.681
 make_images_data                  4110 15.4    0.041    0.047    2.309    2.642
 grid_collocate_task_list           110  9.6    2.372    2.587    2.372    2.587
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.557    2.558
 multiply_cannon_sync_h2d          8220 15.4    2.353    2.490    2.353    2.490
 fft3d_ps                          1111 14.6    1.136    1.164    2.459    2.473
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.206    2.319
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.162    2.184
 jit_kernel_multiply                 10 15.4    0.902    2.143    0.902    2.143
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.011    2.039
 potential_pw2rs                    110 12.3    0.016    0.016    2.017    2.021
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.784    1.991
 mp_alltoall_d11v                  2046 13.8    1.750    1.883    1.750    1.883
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.757    1.770
 cp_fm_cholesky_decompose            22 10.9    1.751    1.766    1.751    1.766
 qs_env_update_s_mstruct             11  6.9    0.024    0.034    1.519    1.627
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.617    1.622
 dbcsr_complete_redistribute        325 12.2    0.557    0.588    1.498    1.596
 mp_allgather_i34                  2055 14.4    0.434    1.534    0.434    1.534
 acc_transpose_blocks              8220 15.4    0.039    0.040    1.358    1.383
 transfer_rs2pw                     451 10.6    0.005    0.005    1.164    1.357
 multiply_cannon_metrocomm4        6165 15.4    0.019    0.020    0.477    1.323
 qs_create_task_list                 11  7.9    0.004    0.005    1.212    1.312
 generate_qs_task_list               11  8.9    0.373    0.440    1.208    1.308
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.240    1.280
 mp_irecv_dv                      24056 15.7    0.451    1.279    0.451    1.279
 mp_waitany                        9240 13.8    1.049    1.227    1.049    1.227
 make_basis_sm                       11  9.8    0.014    0.021    1.183    1.186
 multiply_cannon_metrocomm1        8220 15.4    0.022    0.023    0.818    1.183
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=58.647000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=804.727273, yerr=11.732557
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.356882E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67098.
 MP_Allreduce         9752                    812.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.049   87.088   87.097
 qs_mol_dyn_low                       1  2.0    0.003    0.004   86.782   86.795
 qs_forces                           11  3.9    0.024    0.027   86.669   86.670
 qs_energies                         11  4.9    0.002    0.002   82.518   82.520
 scf_env_do_scf                      11  5.9    0.001    0.001   72.095   72.095
 velocity_verlet                     10  3.0    0.002    0.002   55.482   55.490
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.008   43.889   43.890
 dbcsr_multiply_generic            2055 12.4    0.119    0.125   29.854   29.941
 init_scf_loop                       11  6.9    0.000    0.000   28.134   28.136
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.717   26.750
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.717   26.750
 prepare_preconditioner              11  7.9    0.000    0.000   26.127   26.134
 make_preconditioner                 11  8.9    0.000    0.000   26.127   26.134
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.225   25.614
 ot_scf_mini                         99  9.5    0.002    0.002   24.918   24.944
 multiply_cannon                   2055 13.4    0.336    0.363   22.763   23.505
 multiply_cannon_loop              2055 14.4    0.820    0.835   20.978   21.279
 cp_fm_upper_to_full                 70 14.2   12.725   18.362   12.725   18.362
 ot_mini                             99 10.5    0.001    0.001   14.087   14.114
 rebuild_ks_matrix                  110  8.3    0.000    0.001   13.514   13.547
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   13.514   13.546
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.274   12.303
 dbcsr_complete_redistribute        325 12.2    1.020    1.043    7.445   10.691
 multiply_cannon_multrec           8220 15.4    4.075    4.253    9.855    9.957
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.448    9.699
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.480    9.508
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.886    9.114
 mp_alltoall_i22                    605 13.7    5.513    8.743    5.513    8.743
 mp_waitall_1                     84994 16.7    7.396    8.195    7.396    8.195
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.047    7.083
 calculate_rho_elec                 110  8.6    0.225    0.225    7.047    7.082
 sum_up_and_integrate               110 10.3    0.002    0.002    6.682    6.697
 integrate_v_rspace                 110 11.3    0.004    0.004    6.654    6.669
 init_scf_run                        11  5.9    0.000    0.001    6.090    6.090
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    6.090    6.090
 make_m2s                          4110 13.4    0.044    0.044    5.349    5.840
 dbcsr_mm_accdrv_process          11614 15.7    3.924    4.301    5.631    5.824
 make_images                       4110 14.4    0.890    0.950    5.159    5.650
 qs_ot_get_p                        110 10.4    0.001    0.001    5.493    5.526
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.740    5.214
 apply_single                       110 13.6    0.000    0.000    4.739    5.213
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.019    4.773    5.090
 cp_fm_cholesky_invert               11 10.9    5.047    5.051    5.047    5.051
 pw_transfer                       1331 11.6    0.074    0.075    4.879    4.889
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.010    4.762    4.772
 ot_diis_step                        99 11.5    0.015    0.015    4.590    4.592
 fft_wrap_pw1pw2_140                451 13.1    1.279    1.285    4.169    4.175
 qs_ot_p2m_diag                      48 11.0    0.151    0.155    3.955    3.962
 grid_integrate_task_list           110 12.3    3.675    3.740    3.675    3.740
 qs_energies_init_hamiltonians       11  5.9    0.017    0.027    3.724    3.725
 calculate_dm_sparse                110  9.5    0.001    0.001    3.523    3.547
 hybrid_alltoall_any               4261 16.3    0.265    0.579    2.803    3.536
 density_rs2pw                      110  9.6    0.004    0.004    3.521    3.530
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.014    3.491
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.440    3.440
 make_images_data                  4110 15.4    0.046    0.049    2.782    3.402
 wfi_extrapolate                     11  7.9    0.001    0.001    3.328    3.329
 multiply_cannon_sync_h2d          8220 15.4    3.131    3.151    3.131    3.151
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.928    2.931
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.841    2.842
 cp_fm_diag_elpa_base                48 14.0    2.305    2.503    2.840    2.840
 fft3d_ps                          1111 14.6    1.296    1.305    2.815    2.821
 grid_collocate_task_list           110  9.6    2.666    2.681    2.666    2.681
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.647    2.647
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.632    2.647
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.411    2.422
 qs_env_update_s_mstruct             11  6.9    0.001    0.002    2.225    2.283
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.118    2.246
 potential_pw2rs                    110 12.3    0.022    0.022    2.217    2.219
 acc_transpose_blocks              8220 15.4    0.040    0.040    2.025    2.052
 mp_alltoall_d11v                  2046 13.8    1.996    2.046    1.996    2.046
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    2.021    2.035
 cp_fm_cholesky_decompose            22 10.9    1.958    1.975    1.958    1.975
 qs_create_task_list                 11  7.9    0.017    0.017    1.896    1.943
 generate_qs_task_list               11  8.9    0.731    0.783    1.879    1.925
 jit_kernel_multiply                 10 15.2    1.511    1.848    1.511    1.848
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.840    1.843
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.727    1.763
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=87.097000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1231.090909, yerr=50.060789
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             633.208832E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955383376
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  57761.
 MP_Allreduce        11084                    796.
 MP_Sync                87
 MP_Alltoall          2226                4093030.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.025  209.355  209.357
 qs_mol_dyn_low                       1  2.0    0.003    0.003  208.883  208.934
 qs_forces                           11  3.9    0.044    0.051  208.397  208.398
 qs_energies                         11  4.9    0.002    0.003  202.735  202.790
 scf_env_do_scf                      11  5.9    0.001    0.001  185.498  185.502
 scf_env_do_scf_inner_loop          117  6.6    0.006    0.033  164.466  164.468
 dbcsr_multiply_generic            2507 12.6    0.197    0.225  126.539  127.111
 qs_scf_new_mos                     117  7.6    0.001    0.001  125.082  125.320
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  125.082  125.319
 velocity_verlet                     10  3.0    0.004    0.022  124.980  124.985
 ot_scf_mini                        117  9.6    0.003    0.003  118.363  118.558
 multiply_cannon                   2507 13.6    0.242    0.250  102.166  104.043
 multiply_cannon_loop              2507 14.6    2.393    2.455   99.917  101.873
 ot_mini                            117 10.6    0.001    0.001   66.718   66.946
 multiply_cannon_multrec          60168 15.6   31.840   33.943   41.953   44.236
 qs_ot_get_derivative               117 11.6    0.001    0.001   41.915   42.130
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.524   33.896
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   33.523   33.896
 mp_waitall_1                    267128 16.5   29.005   32.668   29.005   32.668
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.079   30.420
 qs_ot_get_p                        128 10.4    0.001    0.001   29.614   29.918
 multiply_cannon_sync_h2d         60168 15.6   26.270   28.174   26.270   28.174
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.358   25.242
 apply_single                       128 13.6    0.001    0.001   24.358   25.242
 ot_diis_step                       117 11.6    0.008    0.008   24.552   24.553
 qs_ot_p2m_diag                      83 11.4    0.079    0.091   22.576   22.655
 init_scf_loop                       11  6.9    0.000    0.001   20.957   20.958
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   19.707   19.875
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   19.806   19.806
 multiply_cannon_metrocomm3       60168 15.6    0.116    0.121   16.170   18.740
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   16.829   16.870
 cp_fm_diag_elpa_base                83 14.4   16.761   16.809   16.825   16.867
 prepare_preconditioner              11  7.9    0.000    0.000   15.949   15.972
 make_preconditioner                 11  8.9    0.000    0.000   15.948   15.972
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.182   15.349
 make_m2s                          5014 13.6    0.105    0.117   13.823   14.158
 make_images                       5014 14.6    0.398    0.416   13.639   13.988
 sum_up_and_integrate               128 10.3    0.002    0.004   13.958   13.974
 integrate_v_rspace                 128 11.3    0.003    0.004   13.899   13.917
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.179   13.305
 calculate_rho_elec                 128  8.7    0.045    0.064   13.178   13.304
 init_scf_run                        11  5.9    0.000    0.001   13.011   13.012
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   13.011   13.012
 mp_sum_l                          7950 12.9    9.266   10.674    9.266   10.674
 dbcsr_mm_accdrv_process         124484 16.2    4.726    4.884    9.680   10.136
 cp_fm_cholesky_invert               11 10.9    9.182    9.190    9.182    9.190
 wfi_extrapolate                     11  7.9    0.001    0.001    9.070    9.070
 calculate_dm_sparse                128  9.5    0.001    0.001    8.635    8.723
 multiply_cannon_metrocomm1       60168 15.6    0.093    0.097    6.402    8.671
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.450    8.590
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    8.268    8.397
 pw_transfer                       1547 11.6    0.074    0.085    7.661    7.839
 make_images_data                  5014 15.6    0.067    0.073    6.741    7.739
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    7.458    7.633
 density_rs2pw                      128  9.7    0.006    0.007    6.789    7.475
 grid_integrate_task_list           128 12.3    7.056    7.381    7.056    7.381
 hybrid_alltoall_any               5200 16.5    0.297    2.276    5.891    7.341
 fft_wrap_pw1pw2_140                523 13.2    1.127    1.173    6.577    6.744
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.711    6.722
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.829    5.951
 fft3d_ps                          1291 14.7    2.193    2.847    5.464    5.794
 mp_alltoall_d11v                  2415 14.1    4.377    5.426    4.377    5.426
 grid_collocate_task_list           128  9.7    4.846    5.259    4.846    5.259
 cp_fm_cholesky_decompose            22 10.9    4.586    4.599    4.586    4.599
 potential_pw2rs                    128 12.3    0.009    0.010    4.452    4.468
 mp_sum_d                          4470 12.1    3.773    4.455    3.773    4.455
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=209.357000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=596.818182, yerr=6.042549
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430460020736       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1958505086976       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986244964352       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992000282624       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753956716544       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613089636352       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239146475520       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239146475520       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911124992000       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.228663E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.199914E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806316384       0.0%      0.0%    100.0%
 number of processed stacks               6022464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.2
 marketing flops                   145.647559E+12
 -------------------------------------------------------------------------------
 # multiplications                           2527
 max memory usage/rank             845.664256E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2425920
 MPI messages size (bytes):
  total size                         4.132350E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.703416E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               71436               2336489472
     32768 < size <=   131072              728832              55956209664
    131072 < size <=  4194304             1386864            1409906900992
   4194304 < size <= 16777216              155760            1473826772352
  16777216 < size                           68112            1190343475200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4024                  57903.
 MP_Allreduce        11138                    958.
 MP_Sync                87
 MP_Alltoall          1983                4870156.
 MP_SendRecv         12126                  47072.
 MP_ISendRecv        12126                  47072.
 MP_Wait             26114
 MP_ISend            11836                 212447.
 MP_IRecv            11836                 212447.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.041    0.096  191.228  191.229
 qs_mol_dyn_low                       1  2.0    0.003    0.004  190.356  190.370
 qs_forces                           11  3.9    0.004    0.005  190.257  190.258
 qs_energies                         11  4.9    0.003    0.016  183.538  183.553
 scf_env_do_scf                      11  5.9    0.002    0.012  166.925  166.935
 scf_env_do_scf_inner_loop          118  6.6    0.003    0.008  133.997  134.000
 velocity_verlet                     10  3.0    0.008    0.016  120.316  120.318
 dbcsr_multiply_generic            2527 12.6    0.192    0.198   98.001   99.223
 qs_scf_new_mos                     118  7.6    0.001    0.001   95.165   95.789
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   95.165   95.788
 ot_scf_mini                        118  9.6    0.004    0.004   90.296   91.023
 multiply_cannon                   2527 13.6    0.508    0.563   77.541   81.948
 multiply_cannon_loop              2527 14.6    1.591    1.661   74.613   77.563
 ot_mini                            118 10.6    0.001    0.001   50.669   51.329
 mp_waitall_1                    216438 16.6   24.363   38.486   24.363   38.486
 multiply_cannon_multrec          30324 15.6   21.064   25.833   31.739   36.894
 rebuild_ks_matrix                  129  8.3    0.001    0.001   32.395   33.223
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.019   32.394   33.222
 init_scf_loop                       11  6.9    0.001    0.003   32.827   32.829
 qs_ks_update_qs_env                129  7.6    0.001    0.001   29.162   29.925
 qs_ot_get_derivative               118 11.6    0.001    0.002   28.385   29.098
 multiply_cannon_metrocomm3       30324 15.6    0.097    0.103   15.931   28.876
 prepare_preconditioner              11  7.9    0.000    0.000   28.511   28.572
 make_preconditioner                 11  8.9    0.000    0.001   28.511   28.572
 make_full_inverse_cholesky          11  9.9    0.000    0.000   27.218   27.754
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   22.291   23.411
 apply_single                       129 13.6    0.001    0.001   22.291   23.411
 qs_ot_get_p                        129 10.4    0.001    0.001   21.579   22.345
 ot_diis_step                       118 11.6    0.014    0.015   22.082   22.084
 multiply_cannon_sync_h2d         30324 15.6   18.068   21.145   18.068   21.145
 qs_ot_p2m_diag                      83 11.4    0.188    0.216   16.615   16.661
 cp_fm_cholesky_invert               11 10.9   16.628   16.641   16.628   16.641
 make_m2s                          5054 13.6    0.090    0.095   14.322   15.540
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   15.376   15.377
 make_images                       5054 14.6    1.185    1.393   14.109   15.327
 sum_up_and_integrate               129 10.3    0.002    0.004   14.083   14.114
 integrate_v_rspace                 129 11.3    0.003    0.004   14.022   14.057
 qs_rho_update_rho_low              129  7.7    0.001    0.001   13.469   13.501
 calculate_rho_elec                 129  8.7    0.088    0.104   13.469   13.500
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   12.199   12.238
 cp_fm_diag_elpa_base                83 14.4   11.946   12.020   12.193   12.228
 init_scf_run                        11  5.9    0.000    0.001   11.816   11.817
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   11.815   11.817
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   11.169   11.672
 multiply_cannon_metrocomm4       27797 15.6    0.105    0.119    3.941   11.041
 dbcsr_mm_accdrv_process          62734 16.2    5.487    6.350   10.120   10.714
 mp_irecv_dv                      70031 16.3    3.735   10.626    3.735   10.626
 make_images_data                  5054 15.6    0.068    0.079    8.402    9.918
 hybrid_alltoall_any               5240 16.5    0.356    1.549    7.133    9.033
 pw_transfer                       1559 11.6    0.085    0.094    8.644    8.703
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.011    8.419    8.483
 wfi_extrapolate                     11  7.9    0.001    0.001    8.353    8.353
 density_rs2pw                      129  9.7    0.006    0.007    7.111    7.763
 fft_wrap_pw1pw2_140                527 13.2    1.225    1.242    7.456    7.540
 grid_integrate_task_list           129 12.3    7.238    7.532    7.238    7.532
 qs_ot_get_derivative_taylor         41 13.0    0.001    0.001    6.424    7.224
 cp_fm_cholesky_decompose            22 10.9    6.905    6.977    6.905    6.977
 calculate_dm_sparse                129  9.5    0.001    0.001    6.700    6.875
 mp_sum_l                          8010 12.9    4.359    6.630    4.359    6.630
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.080    6.090
 fft3d_ps                          1301 14.7    2.859    3.033    5.960    6.012
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    5.471    5.560
 grid_collocate_task_list           129  9.7    5.049    5.492    5.049    5.492
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.152    5.354
 mp_alltoall_d11v                  2423 14.1    4.219    5.257    4.219    5.257
 mp_allgather_i34                  2527 14.6    1.431    4.622    1.431    4.622
 potential_pw2rs                    129 12.3    0.016    0.019    4.507    4.524
 mp_sum_d                          4499 12.1    2.733    4.197    2.733    4.197
 dbcsr_complete_redistribute        395 12.7    0.772    0.876    3.158    4.001
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=191.229000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=802.727273, yerr=5.561081
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410024443904       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444712984576       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796586E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.906046E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705502176       0.0%      0.0%    100.0%
 number of processed stacks               3951168       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1697.1
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             939.151360E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1033760
 MPI messages size (bytes):
  total size                         2.695213E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.607194E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              279168              36591108096
    131072 < size <=  4194304              654272             987691483136
   4194304 < size <= 16777216               65184             925173050704
  16777216 < size                           28448             745747251200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  58192.
 MP_Allreduce        11085                   1000.
 MP_Sync                86
 MP_Alltoall          1700                9383497.
 MP_SendRecv          7874                  75008.
 MP_ISendRecv         7874                  75008.
 MP_Wait             21654
 MP_ISend            11660                 275234.
 MP_IRecv            11660                 275234.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.038    0.058  175.744  175.744
 qs_mol_dyn_low                       1  2.0    0.003    0.003  174.805  174.820
 qs_forces                           11  3.9    0.004    0.004  174.688  174.691
 qs_energies                         11  4.9    0.002    0.002  168.158  168.169
 scf_env_do_scf                      11  5.9    0.001    0.001  152.423  152.425
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  117.322  117.323
 velocity_verlet                     10  3.0    0.013    0.022  111.768  111.771
 dbcsr_multiply_generic            2485 12.5    0.179    0.184   81.692   82.785
 qs_scf_new_mos                     116  7.6    0.001    0.001   80.548   80.854
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   80.547   80.853
 ot_scf_mini                        116  9.6    0.003    0.004   76.289   76.644
 multiply_cannon                   2485 13.5    0.507    0.531   61.941   66.789
 multiply_cannon_loop              2485 14.5    1.125    1.181   58.841   61.492
 ot_mini                            116 10.6    0.001    0.001   42.354   42.694
 init_scf_loop                       11  6.9    0.000    0.000   35.001   35.002
 mp_waitall_1                    169034 16.6   24.478   33.409   24.478   33.409
 prepare_preconditioner              11  7.9    0.000    0.000   31.005   31.062
 make_preconditioner                 11  8.9    0.000    0.000   31.005   31.062
 rebuild_ks_matrix                  127  8.3    0.001    0.001   30.005   30.492
 qs_ks_build_kohn_sham_matrix       127  9.3    0.016    0.017   30.004   30.492
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.657   30.067
 qs_ks_update_qs_env                127  7.6    0.001    0.001   27.033   27.477
 multiply_cannon_multrec          19880 15.5   13.030   15.972   22.350   25.293
 multiply_cannon_metrocomm3       19880 15.5    0.062    0.064   15.132   24.050
 qs_ot_get_derivative               116 11.6    0.002    0.002   22.969   23.317
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.618   20.653
 apply_single                       127 13.6    0.001    0.001   19.618   20.653
 qs_ot_get_p                        127 10.4    0.001    0.001   19.335   19.736
 ot_diis_step                       116 11.6    0.018    0.018   19.278   19.279
 make_m2s                          4970 13.5    0.079    0.083   14.644   16.016
 make_images                       4970 14.5    1.147    1.239   14.412   15.773
 qs_ot_p2m_diag                      82 11.4    0.261    0.268   15.049   15.057
 multiply_cannon_sync_h2d         19880 15.5   13.497   15.006   13.497   15.006
 cp_fm_cholesky_invert               11 10.9   14.417   14.426   14.417   14.426
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   13.991   13.992
 sum_up_and_integrate               127 10.3    0.002    0.003   13.805   13.831
 integrate_v_rspace                 127 11.3    0.003    0.004   13.746   13.775
 qs_rho_update_rho_low              127  7.7    0.001    0.001   13.434   13.473
 calculate_rho_elec                 127  8.7    0.129    0.143   13.433   13.472
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   10.811   10.829
 cp_fm_diag_elpa_base                82 14.4   10.407   10.544   10.807   10.825
 init_scf_run                        11  5.9    0.000    0.001   10.723   10.724
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   10.723   10.723
 make_images_data                  4970 15.5    0.062    0.072    8.748   10.582
 hybrid_alltoall_any               5155 16.4    0.445    2.018    7.626    9.912
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    8.948    9.211
 multiply_cannon_metrocomm4       17395 15.5    0.066    0.076    3.458    9.208
 dbcsr_mm_accdrv_process          41158 16.2    5.638    5.906    8.787    8.985
 mp_irecv_dv                      49801 16.2    3.331    8.957    3.331    8.957
 pw_transfer                       1535 11.6    0.084    0.100    8.663    8.765
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    8.441    8.552
 grid_integrate_task_list           127 12.3    7.213    7.746    7.213    7.746
 fft_wrap_pw1pw2_140                519 13.2    1.292    1.321    7.486    7.608
 wfi_extrapolate                     11  7.9    0.001    0.001    7.472    7.472
 cp_fm_cholesky_decompose            22 10.9    7.341    7.358    7.341    7.358
 cp_fm_upper_to_full                104 14.8    5.830    7.342    5.830    7.342
 density_rs2pw                      127  9.7    0.006    0.006    6.828    7.069
 dbcsr_complete_redistribute        393 12.7    1.167    1.197    4.573    6.358
 calculate_dm_sparse                127  9.5    0.001    0.001    5.915    6.021
 fft3d_ps                          1281 14.7    2.736    2.964    5.839    5.921
 grid_collocate_task_list           127  9.7    5.117    5.619    5.117    5.619
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.457    5.463
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.660    5.425
 copy_fm_to_dbcsr                   208 11.6    0.001    0.002    3.412    5.195
 mp_alltoall_d11v                  2401 14.1    4.281    4.861    4.281    4.861
 mp_allgather_i34                  2485 14.5    1.598    4.831    1.598    4.831
 mp_sum_l                          7884 12.9    3.289    4.825    3.289    4.825
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.545    4.681
 potential_pw2rs                    127 12.3    0.021    0.023    4.321    4.333
 transfer_fm_to_dbcsr                11  9.9    0.020    0.025    2.328    4.107
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    3.882    3.920
 mp_alltoall_i22                    712 14.1    1.957    3.909    1.957    3.909
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.724    3.725
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=175.744000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=887.727273, yerr=7.411182
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430457200640       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986252263424       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992003932160       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613077360640       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239167967232       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239167967232       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.233007E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.387242E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806383584       0.0%      0.0%    100.0%
 number of processed stacks               6026880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1129.3
 marketing flops                   145.651870E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank               1.157599E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1153224
 MPI messages size (bytes):
  total size                         2.039489E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.768511E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              322096              36390305792
    131072 < size <=  4194304              721976             792118951936
   4194304 < size <= 16777216               70800             669922014800
  16777216 < size                           30960             541065216000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4043                  57626.
 MP_Allreduce        11184                   1079.
 MP_Sync                88
 MP_Alltoall          1724               12509627.
 MP_SendRecv          5934                  75008.
 MP_ISendRecv         5934                  75008.
 MP_Wait             22612
 MP_ISend            15064                 244788.
 MP_IRecv            15064                 244788.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.057  189.256  189.271
 qs_mol_dyn_low                       1  2.0    0.003    0.004  188.683  188.903
 qs_forces                           11  3.9    0.004    0.005  188.218  188.225
 qs_energies                         11  4.9    0.149    0.151  181.129  181.139
 scf_env_do_scf                      11  5.9    0.001    0.001  163.666  163.678
 velocity_verlet                     10  3.0    0.002    0.004  125.586  125.603
 scf_env_do_scf_inner_loop          118  6.6    0.026    0.093  117.307  117.308
 dbcsr_multiply_generic            2529 12.6    0.189    0.194   81.501   82.211
 qs_scf_new_mos                     118  7.6    0.001    0.001   80.846   81.192
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   80.846   81.191
 ot_scf_mini                        118  9.6    0.003    0.004   76.245   76.623
 multiply_cannon                   2529 13.6    0.557    0.583   56.392   60.146
 multiply_cannon_loop              2529 14.6    1.850    1.942   52.735   54.730
 init_scf_loop                       11  6.9    0.000    0.000   46.238   46.239
 ot_mini                            118 10.6    0.001    0.001   43.332   43.704
 prepare_preconditioner              11  7.9    0.000    0.000   41.968   41.989
 make_preconditioner                 11  8.9    0.000    0.000   41.968   41.989
 make_full_inverse_cholesky          11  9.9    0.013    0.022   35.543   40.541
 multiply_cannon_multrec          30348 15.6   13.649   18.878   26.728   31.697
 rebuild_ks_matrix                  129  8.3    0.001    0.001   29.364   29.739
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.019   29.364   29.739
 mp_waitall_1                    149172 16.7   17.445   27.175   17.445   27.175
 qs_ks_update_qs_env                129  7.6    0.001    0.001   26.516   26.850
 qs_ot_get_derivative               118 11.6    0.002    0.002   23.578   23.954
 make_m2s                          5058 13.6    0.097    0.102   20.567   21.465
 make_images                       5058 14.6    1.982    2.262   20.259   21.155
 apply_preconditioner_dbcsr         129 12.6    0.000    0.001   19.158   19.676
 apply_single                       129 13.6    0.001    0.001   19.157   19.676
 ot_diis_step                       118 11.6    0.018    0.019   19.631   19.633
 qs_ot_get_p                        129 10.4    0.001    0.001   19.068   19.481
 cp_fm_upper_to_full                106 14.8   11.446   16.844   11.446   16.844
 cp_fm_cholesky_invert               11 10.9   15.942   15.951   15.942   15.951
 multiply_cannon_metrocomm3       30348 15.6    0.050    0.053    6.408   14.996
 qs_ot_p2m_diag                      84 11.4    0.346    0.394   14.694   14.748
 sum_up_and_integrate               129 10.3    0.002    0.003   13.976   14.000
 integrate_v_rspace                 129 11.3    0.003    0.004   13.915   13.941
 qs_rho_update_rho_low              129  7.7    0.001    0.001   13.755   13.797
 calculate_rho_elec                 129  8.7    0.174    0.190   13.755   13.796
 cp_dbcsr_syevd                      84 12.4    0.005    0.006   13.267   13.268
 dbcsr_mm_accdrv_process          62780 16.2    8.511    9.366   12.648   13.133
 dbcsr_complete_redistribute        397 12.7    1.511    1.620    9.166   12.917
 make_images_data                  5058 15.6    0.067    0.075   11.058   12.729
 copy_fm_to_dbcsr                   210 11.7    0.001    0.002    7.791   11.515
 hybrid_alltoall_any               5245 16.5    0.534    2.232    9.781   11.422
 multiply_cannon_sync_h2d         30348 15.6   10.483   11.236   10.483   11.236
 init_scf_run                        11  5.9    0.000    0.001   11.009   11.010
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   11.009   11.010
 qs_ot_get_derivative_diag           78 12.4    0.002    0.002    9.935   10.185
 cp_fm_diag_elpa                     84 13.4    0.000    0.000   10.047   10.064
 transfer_fm_to_dbcsr                11  9.9    0.002    0.008    6.402   10.060
 cp_fm_diag_elpa_base                84 14.4    9.092    9.420   10.041   10.056
 mp_alltoall_i22                    720 14.1    5.648    9.389    5.648    9.389
 pw_transfer                       1559 11.6    0.086    0.101    8.954    9.020
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.011    8.726    8.796
 grid_integrate_task_list           129 12.3    7.589    7.954    7.589    7.954
 fft_wrap_pw1pw2_140                527 13.2    1.456    1.484    7.771    7.847
 wfi_extrapolate                     11  7.9    0.001    0.001    7.660    7.660
 cp_fm_cholesky_decompose            22 10.9    7.429    7.531    7.429    7.531
 density_rs2pw                      129  9.7    0.006    0.006    6.734    7.268
 multiply_cannon_metrocomm4       25290 15.6    0.085    0.096    2.886    6.881
 mp_irecv_dv                      76751 16.2    2.728    6.605    2.728    6.605
 calculate_dm_sparse                129  9.5    0.001    0.001    6.491    6.601
 fft3d_ps                          1301 14.7    2.861    2.915    5.886    5.935
 grid_collocate_task_list           129  9.7    5.327    5.693    5.327    5.693
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.541    5.588
 mp_alltoall_d11v                  2429 14.1    5.007    5.337    5.007    5.337
 qs_energies_init_hamiltonians       11  5.9    0.038    0.039    4.795    4.795
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.551    4.689
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.416    4.526
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    4.293    4.359
 potential_pw2rs                    129 12.3    0.023    0.024    4.177    4.190
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=189.271000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1087.181818, yerr=18.324441
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410023282688       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444707676160       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796579E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.820059E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705500928       0.0%      0.0%    100.0%
 number of processed stacks               1944496       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3448.5
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               1.533350E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  238560
 MPI messages size (bytes):
  total size                         1.321104E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.537828E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              112800              59139686400
   4194304 < size <= 16777216              104112             545846722560
  16777216 < size                           20064             716108638608
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8852                     52.
 MP_Alltoall          9584                 804353.
 MP_ISend            39716                2104723.
 MP_IRecv            39716                2103824.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3997                  58273.
 MP_Allreduce        11070                   1167.
 MP_Sync                86
 MP_Alltoall          1700               18828160.
 MP_SendRecv          3810                 122880.
 MP_ISendRecv         3810                 122880.
 MP_Wait             16000
 MP_ISend            10600                 423612.
 MP_IRecv            10600                 423612.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.032  169.119  169.120
 qs_mol_dyn_low                       1  2.0    0.003    0.004  168.719  168.736
 qs_forces                           11  3.9    0.004    0.005  168.602  168.604
 qs_energies                         11  4.9    0.025    0.061  161.234  161.244
 scf_env_do_scf                      11  5.9    0.001    0.001  143.343  143.357
 velocity_verlet                     10  3.0    0.011    0.017  110.342  110.346
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.009  107.730  107.731
 dbcsr_multiply_generic            2485 12.5    0.180    0.192   71.912   72.239
 qs_scf_new_mos                     116  7.6    0.001    0.001   71.453   71.530
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   71.452   71.529
 ot_scf_mini                        116  9.6    0.003    0.004   67.025   67.093
 multiply_cannon                   2485 13.5    0.558    0.601   53.335   56.675
 multiply_cannon_loop              2485 14.5    0.801    0.837   50.328   50.984
 ot_mini                            116 10.6    0.001    0.001   37.457   37.523
 init_scf_loop                       11  6.9    0.000    0.000   35.461   35.464
 prepare_preconditioner              11  7.9    0.000    0.000   31.606   31.627
 make_preconditioner                 11  8.9    0.000    0.000   31.605   31.627
 mp_waitall_1                    124680 16.7   24.407   30.657   24.407   30.657
 make_full_inverse_cholesky          11  9.9    0.017    0.025   29.504   29.776
 rebuild_ks_matrix                  127  8.3    0.001    0.001   28.731   28.843
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.018   28.731   28.842
 qs_ks_update_qs_env                127  7.6    0.001    0.001   26.173   26.273
 multiply_cannon_multrec           9940 15.5   10.437   14.755   18.338   21.422
 qs_ot_get_derivative               116 11.6    0.001    0.002   20.671   20.741
 multiply_cannon_metrocomm3        9940 15.5    0.025    0.027   12.372   19.625
 cp_fm_cholesky_invert               11 10.9   18.024   18.029   18.024   18.029
 qs_ot_get_p                        127 10.4    0.001    0.001   16.934   17.009
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   16.843   16.943
 apply_single                       127 13.6    0.001    0.001   16.843   16.943
 ot_diis_step                       116 11.6    0.019    0.020   16.717   16.718
 make_m2s                          4970 13.5    0.065    0.070   14.722   15.656
 make_images                       4970 14.5    2.163    2.584   14.417   15.343
 sum_up_and_integrate               127 10.3    0.002    0.002   13.948   13.995
 integrate_v_rspace                 127 11.3    0.004    0.004   13.889   13.937
 qs_rho_update_rho_low              127  7.7    0.001    0.001   13.862   13.893
 calculate_rho_elec                 127  8.7    0.254    0.265   13.861   13.892
 qs_ot_p2m_diag                      82 11.4    0.490    0.499   13.266   13.282
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   12.111   12.112
 multiply_cannon_sync_h2d          9940 15.5   10.803   11.121   10.803   11.121
 init_scf_run                        11  5.9    0.000    0.001   11.010   11.010
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   11.010   11.010
 hybrid_alltoall_any               5155 16.4    0.837    3.764    8.292    9.844
 make_images_data                  4970 15.5    0.056    0.065    8.479    9.754
 cp_fm_diag_elpa                     82 13.4    0.000    0.000    9.275    9.287
 cp_fm_diag_elpa_base                82 14.4    9.037    9.113    9.269    9.280
 pw_transfer                       1535 11.6    0.084    0.093    9.194    9.223
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    8.973    9.009
 dbcsr_mm_accdrv_process          20590 16.1    3.661    4.905    7.552    8.322
 grid_integrate_task_list           127 12.3    7.714    8.121    7.714    8.121
 cp_fm_cholesky_decompose            22 10.9    7.913    8.025    7.913    8.025
 qs_ot_get_derivative_diag           76 12.4    0.002    0.003    7.952    8.006
 fft_wrap_pw1pw2_140                519 13.2    1.791    1.831    7.957    7.996
 wfi_extrapolate                     11  7.9    0.001    0.001    7.471    7.471
 multiply_cannon_metrocomm1        9940 15.5    0.030    0.030    4.322    7.129
 density_rs2pw                      127  9.7    0.005    0.006    6.613    6.937
 calculate_dm_sparse                127  9.5    0.001    0.001    6.205    6.274
 fft3d_ps                          1281 14.7    2.744    2.817    5.726    5.773
 grid_collocate_task_list           127  9.7    5.529    5.766    5.529    5.766
 dbcsr_complete_redistribute        393 12.7    2.112    2.165    5.218    5.623
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.268    5.308
 qs_energies_init_hamiltonians       11  5.9    0.012    0.017    5.239    5.240
 mp_alltoall_d11v                  2401 14.1    4.591    5.178    4.591    5.178
 mp_allgather_i34                  2485 14.5    1.180    4.632    1.180    4.632
 potential_pw2rs                    127 12.3    0.026    0.028    4.186    4.194
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.135    4.192
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.579    3.857
 copy_fm_to_dbcsr                   208 11.6    0.001    0.002    3.393    3.754
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.608    3.685
 multiply_cannon_metrocomm4        7455 15.5    0.026    0.029    1.717    3.683
 mp_irecv_dv                      28618 15.9    1.679    3.614    1.679    3.614
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    3.566    3.592
 copy_dbcsr_to_fm                   185 11.7    0.004    0.004    3.427    3.500
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.460    3.470
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=169.120000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1440.000000, yerr=28.239238
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410020794368       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957875093504       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963540389888       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714619674624       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444697722880       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019185291264       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019185291264       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624869085184       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.815908E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.608822E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705793568       0.0%      0.0%    100.0%
 number of processed stacks               1951120       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3436.9
 marketing flops                   143.527070E+12
 -------------------------------------------------------------------------------
 # multiplications                           2494
 max memory usage/rank               3.196068E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   99760
 MPI messages size (bytes):
  total size                         1.127724E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.304370E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               45128              35047604224
   4194304 < size <= 16777216               43984             376564613120
  16777216 < size                           10032             716108641296
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3956                  59799.
 MP_Allreduce        10967                   1506.
 MP_Sync                87
 MP_Alltoall          1700               36954490.
 MP_SendRecv          1778                 218624.
 MP_ISendRecv         1778                 218624.
 MP_Wait              9728
 MP_ISend             6360                1080477.
 MP_IRecv             6360                1080477.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.036    0.059  288.968  288.975
 qs_mol_dyn_low                       1  2.0    0.003    0.003  288.366  288.394
 qs_forces                           11  3.9    0.005    0.005  288.237  288.242
 qs_energies                         11  4.9    0.003    0.003  279.408  279.415
 scf_env_do_scf                      11  5.9    0.001    0.001  256.790  256.796
 velocity_verlet                     10  3.0    0.002    0.002  208.766  208.778
 scf_env_do_scf_inner_loop          116  6.6    0.004    0.010  132.450  132.451
 init_scf_loop                       11  6.9    0.000    0.000  124.066  124.068
 prepare_preconditioner              11  7.9    0.000    0.000  119.285  119.303
 make_preconditioner                 11  8.9    0.000    0.000  119.285  119.303
 make_full_inverse_cholesky          11  9.9    0.038    0.039   95.623  116.427
 qs_scf_new_mos                     116  7.6    0.001    0.001   89.559   89.646
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   89.558   89.645
 ot_scf_mini                        116  9.6    0.004    0.004   84.710   84.759
 dbcsr_multiply_generic            2494 12.5    0.212    0.224   81.948   82.376
 cp_fm_upper_to_full                105 14.8   53.193   75.755   53.193   75.755
 multiply_cannon                   2494 13.5    0.684    0.725   59.411   60.362
 multiply_cannon_loop              2494 14.5    1.034    1.047   55.434   56.695
 ot_mini                            116 10.6    0.001    0.001   44.608   44.661
 dbcsr_complete_redistribute        395 12.7    3.981    4.033   29.625   42.445
 copy_fm_to_dbcsr                   209 11.7    0.001    0.001   26.099   38.933
 transfer_fm_to_dbcsr                11  9.9    0.030    0.030   23.619   36.326
 mp_alltoall_i22                    714 14.1   21.377   34.343   21.377   34.343
 rebuild_ks_matrix                  127  8.3    0.001    0.001   33.565   33.599
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.017   33.565   33.599
 cp_fm_cholesky_invert               11 10.9   33.364   33.370   33.364   33.370
 mp_waitall_1                    103128 16.8   27.805   31.749   27.805   31.749
 qs_ks_update_qs_env                127  7.6    0.001    0.001   31.126   31.157
 qs_ot_get_p                        127 10.4    0.001    0.001   25.158   25.246
 qs_ot_get_derivative               116 11.6    0.002    0.002   25.099   25.151
 qs_ot_p2m_diag                      83 11.4    0.879    0.885   21.192   21.222
 multiply_cannon_metrocomm3        9976 15.5    0.025    0.027   19.147   20.479
 ot_diis_step                       116 11.6    0.022    0.022   19.478   19.478
 make_m2s                          4988 13.5    0.077    0.080   17.999   19.434
 cp_dbcsr_syevd                      83 12.4    0.006    0.006   19.394   19.398
 make_images                       4988 14.5    3.054    3.259   17.519   18.957
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   18.837   18.939
 apply_single                       127 13.6    0.001    0.001   18.837   18.938
 multiply_cannon_multrec           9976 15.5   10.403   12.338   18.303   18.393
 qs_rho_update_rho_low              127  7.7    0.001    0.001   16.268   16.288
 calculate_rho_elec                 127  8.7    0.477    0.478   16.267   16.288
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   16.197   16.198
 cp_fm_diag_elpa_base                83 14.4   11.815   13.375   16.193   16.194
 sum_up_and_integrate               127 10.3    0.002    0.002   15.887   15.976
 integrate_v_rspace                 127 11.3    0.005    0.005   15.825   15.914
 multiply_cannon_sync_h2d          9976 15.5   14.213   14.230   14.213   14.230
 init_scf_run                        11  5.9    0.000    0.001   12.270   12.271
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   12.270   12.271
 hybrid_alltoall_any               5174 16.4    1.301    3.038   10.150   12.204
 make_images_data                  4988 15.5    0.065    0.071    9.952   12.057
 pw_transfer                       1535 11.6    0.094    0.095   11.217   11.228
 fft_wrap_pw1pw2                   1281 12.7    0.011    0.012   10.981   10.990
 fft_wrap_pw1pw2_140                519 13.2    3.030    3.099    9.794    9.799
 qs_ot_get_derivative_diag           77 12.4    0.002    0.003    9.700    9.741
 dbcsr_mm_accdrv_process          20626 16.0    4.344    6.315    7.651    9.698
 wfi_extrapolate                     11  7.9    0.001    0.001    8.981    8.981
 cp_fm_cholesky_decompose            22 10.9    8.933    8.949    8.933    8.949
 grid_integrate_task_list           127 12.3    8.502    8.695    8.502    8.695
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    8.192    8.193
 density_rs2pw                      127  9.7    0.005    0.006    7.385    7.439
 mp_alltoall_d11v                  2407 14.1    6.821    6.959    6.821    6.959
 calculate_dm_sparse                127  9.5    0.001    0.001    6.708    6.786
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.413    6.461
 grid_collocate_task_list           127  9.7    6.371    6.448    6.371    6.448
 fft3d_ps                          1281 14.7    2.835    2.863    6.256    6.306
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    6.199    6.269
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=288.975000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2882.818182, yerr=160.021486
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.259979E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255646.
 MP_Allreduce         3139                   6114.
 MP_Sync                 4
 MP_Alltoall            54
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.049   85.309   85.310
 qs_energies                          1  2.0    0.000    0.001   84.782   84.803
 ls_scf                               1  3.0    0.005    0.039   83.874   83.887
 dbcsr_multiply_generic             111  6.7    0.014    0.016   72.422   72.587
 multiply_cannon                    111  7.7    0.027    0.078   55.743   57.126
 multiply_cannon_loop               111  8.7    0.226    0.239   52.352   54.020
 ls_scf_main                          1  4.0    0.000    0.002   52.262   52.266
 density_matrix_trs4                  2  5.0    0.002    0.003   46.678   46.753
 ls_scf_init_scf                      1  4.0    0.000    0.001   28.568   28.570
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   27.432   27.482
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   25.241   25.317
 mp_waitall_1                     11031 10.9   22.132   25.216   22.132   25.216
 multiply_cannon_multrec           2664  9.7    8.169    9.032   15.541   17.443
 multiply_cannon_sync_h2d          2664  9.7   13.760   16.394   13.760   16.394
 make_m2s                           222  7.7    0.009    0.011   13.071   13.539
 make_images                        222  8.7    0.098    0.108   13.048   13.521
 multiply_cannon_metrocomm1        2664  9.7    0.010    0.010    9.469   12.536
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.378    8.252
 make_images_data                   222  9.7    0.004    0.005    7.622    8.185
 hybrid_alltoall_any                227 10.6    0.216    1.853    6.533    8.052
 dbcsr_mm_accdrv_process           4760 10.4    0.584    0.691    6.991    7.971
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.207    7.113    6.207    7.113
 calculate_norms                   4752  9.8    5.515    6.189    5.515    6.189
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.063    5.196
 mp_sum_l                           887  5.1    3.089    4.547    3.089    4.547
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.015    2.035    3.795
 make_images_sizes                  222  9.7    0.000    0.000    0.774    3.782
 mp_alltoall_i44                    222 10.7    0.774    3.782    0.774    3.782
 mp_irecv_dv                       6231 10.9    2.019    3.771    2.019    3.771
 arnoldi_extremal                     4  6.8    0.000    0.000    3.545    3.629
 arnoldi_normal_ev                    4  7.8    0.234    0.378    3.545    3.629
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.264    3.480
 build_subspace                      16  8.4    0.009    0.012    3.101    3.103
 ls_scf_post                          1  4.0    0.000    0.001    3.038    3.053
 ls_scf_store_result                  1  5.0    0.000    0.000    2.851    2.899
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.322    2.809
 dbcsr_merge_single_wm              555 10.7    0.452    0.586    2.313    2.799
 make_images_pack                   222  9.7    2.206    2.584    2.208    2.586
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.013    2.311    2.550
 dbcsr_sort_data                    658 11.4    2.118    2.538    2.118    2.538
 dbcsr_matrix_vector_mult_local     304 10.0    2.064    2.455    2.067    2.457
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.308    2.381
 buffer_matrices_ensure_size        222  8.7    1.746    2.057    1.746    2.057
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.765    1.766
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.755    1.757
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.006    1.755    1.757
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=85.310000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1131.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.162135E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  10075.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.064    0.111   92.853   92.853
 qs_energies                          1  2.0    0.000    0.000   92.301   92.305
 ls_scf                               1  3.0    0.000    0.000   90.888   90.892
 dbcsr_multiply_generic             111  6.7    0.015    0.016   76.928   77.289
 multiply_cannon                    111  7.7    0.028    0.041   53.619   57.615
 ls_scf_main                          1  4.0    0.000    0.000   57.326   57.329
 multiply_cannon_loop               111  8.7    0.135    0.146   50.696   55.002
 density_matrix_trs4                  2  5.0    0.002    0.003   51.645   51.865
 mp_waitall_1                      9105 10.9   22.314   32.386   22.314   32.386
 ls_scf_init_scf                      1  4.0    0.000    0.000   29.843   29.845
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.678   28.760
 multiply_cannon_multrec           1332  9.7   13.349   16.885   22.621   27.217
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.266   26.278
 multiply_cannon_metrocomm3        1332  9.7    0.007    0.008   12.043   22.050
 make_m2s                           222  7.7    0.006    0.007   15.763   16.414
 make_images                        222  8.7    1.369    1.692   15.733   16.385
 make_images_data                   222  9.7    0.004    0.005    9.447   10.465
 dbcsr_mm_accdrv_process           4041 10.4    0.364    0.547    8.863   10.454
 hybrid_alltoall_any                227 10.6    0.540    2.544    8.697   10.287
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.380    9.908    8.380    9.908
 mp_sum_l                           887  5.1    6.078    9.012    6.078    9.012
 multiply_cannon_metrocomm4        1221  9.7    0.007    0.009    3.207    7.726
 mp_irecv_dv                       3311 11.0    3.187    7.675    3.187    7.675
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.777    7.330
 calculate_norms                   2376  9.8    6.031    6.750    6.031    6.750
 multiply_cannon_sync_h2d          1332  9.7    4.830    6.181    4.830    6.181
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.917    5.135
 arnoldi_extremal                     4  6.8    0.000    0.000    4.716    4.735
 arnoldi_normal_ev                    4  7.8    0.003    0.009    4.716    4.735
 build_subspace                      16  8.4    0.014    0.021    4.464    4.466
 ls_scf_post                          1  4.0    0.000    0.000    3.718    3.722
 ls_scf_store_result                  1  5.0    0.000    0.000    3.401    3.542
 mp_allgather_i34                   111  8.7    0.916    3.395    0.916    3.395
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.153    3.385
 dbcsr_matrix_vector_mult_local     304 10.0    2.749    3.234    2.751    3.236
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.469    3.138
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.619    2.709
 dbcsr_data_new                    4174 10.1    2.109    2.387    2.109    2.387
 make_images_pack                   222  9.7    1.819    2.128    1.822    2.130
 dbcsr_sort_data                    436 11.2    1.822    2.047    1.822    2.047
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.859    1.861
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=92.853000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1774.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.884362E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265448.
 MP_Allreduce         3138                  10896.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.042   94.252   94.253
 qs_energies                          1  2.0    0.000    0.000   93.567   93.571
 ls_scf                               1  3.0    0.000    0.000   91.526   91.529
 dbcsr_multiply_generic             111  6.7    0.016    0.019   75.897   76.189
 ls_scf_main                          1  4.0    0.000    0.000   56.863   56.868
 multiply_cannon                    111  7.7    0.032    0.068   52.217   56.332
 multiply_cannon_loop               111  8.7    0.117    0.128   49.441   53.574
 density_matrix_trs4                  2  5.0    0.002    0.003   50.846   50.985
 mp_waitall_1                      7281 11.0   23.734   33.080   23.734   33.080
 ls_scf_init_scf                      1  4.0    0.000    0.000   31.012   31.015
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.680   29.771
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.153   27.168
 multiply_cannon_multrec            888  9.7   12.705   15.325   21.272   24.474
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   10.920   22.762
 make_m2s                           222  7.7    0.006    0.007   16.506   17.258
 make_images                        222  8.7    1.583    1.858   16.468   17.218
 make_images_data                   222  9.7    0.004    0.004    9.588   10.768
 hybrid_alltoall_any                227 10.6    0.641    2.948    9.132   10.447
 dbcsr_mm_accdrv_process           3754 10.4    0.342    0.814    8.098    9.300
 mp_sum_l                           887  5.1    5.455    8.873    5.455    8.873
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.639    8.803    7.639    8.803
 multiply_cannon_sync_h2d           888  9.7    6.097    7.549    6.097    7.549
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.363    7.324
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.872    7.176
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.453    7.158
 mp_irecv_dv                       2335 11.1    2.437    7.108    2.437    7.108
 arnoldi_extremal                     4  6.8    0.000    0.000    5.065    5.083
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.065    5.083
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.760    4.964
 calculate_norms                   1584  9.8    4.346    4.775    4.346    4.775
 build_subspace                      16  8.4    0.014    0.020    4.763    4.769
 mp_allgather_i34                   111  8.7    0.888    3.808    0.888    3.808
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.439    3.760
 ls_scf_post                          1  4.0    0.000    0.000    3.650    3.654
 dbcsr_matrix_vector_mult_local     304 10.0    3.036    3.603    3.038    3.605
 ls_scf_store_result                  1  5.0    0.000    0.000    3.390    3.473
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.918    2.989
 dbcsr_data_new                    4116  9.9    2.112    2.450    2.112    2.450
 make_images_sizes                  222  9.7    0.000    0.000    1.096    2.346
 mp_alltoall_i44                    222 10.7    1.095    2.346    1.095    2.346
 dbcsr_sort_data                    325 11.1    1.876    2.133    1.876    2.133
 qs_energies_init_hamiltonians        1  3.0    0.072    0.076    2.033    2.033
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.029    2.032
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.012    2.014
 qs_ks_build_kohn_sham_matrix         3  8.3    0.136    0.138    2.012    2.014
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=94.253000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2245.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.332628E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  13030.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.034    0.056  101.476  101.477
 qs_energies                          1  2.0    0.003    0.022  100.687  100.739
 ls_scf                               1  3.0    0.000    0.001   98.827   98.863
 dbcsr_multiply_generic             111  6.7    0.017    0.023   81.400   81.692
 ls_scf_main                          1  4.0    0.003    0.023   62.419   62.420
 multiply_cannon                    111  7.7    0.037    0.063   52.955   56.833
 density_matrix_trs4                  2  5.0    0.004    0.027   56.260   56.412
 multiply_cannon_loop               111  8.7    0.152    0.169   47.947   51.287
 ls_scf_init_scf                      1  4.0    0.000    0.001   33.117   33.119
 mp_waitall_1                      6369 11.0   24.965   32.140   24.965   32.140
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   31.677   31.747
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.009   28.960   28.979
 multiply_cannon_multrec           1332  9.7   14.263   17.306   22.173   24.859
 make_m2s                           222  7.7    0.007    0.008   22.132   23.477
 make_images                        222  8.7    3.152    3.607   22.082   23.428
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.004    9.939   19.450
 make_images_data                   222  9.7    0.004    0.005   12.763   14.386
 hybrid_alltoall_any                227 10.6    0.797    3.774   12.114   13.880
 dbcsr_mm_accdrv_process           3641 10.4    0.285    0.477    7.522    9.050
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.083    8.554    7.083    8.554
 mp_sum_l                           887  5.1    4.743    7.785    4.743    7.785
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.727    6.395
 multiply_cannon_sync_h2d          1332  9.7    5.391    6.047    5.391    6.047
 arnoldi_extremal                     4  6.8    0.000    0.000    5.946    5.963
 arnoldi_normal_ev                    4  7.8    0.002    0.009    5.946    5.963
 multiply_cannon_metrocomm4        1110  9.7    0.005    0.007    2.081    5.960
 mp_irecv_dv                       3229 10.9    2.057    5.882    2.057    5.882
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    3.091    5.646
 build_subspace                      16  8.4    0.014    0.021    5.573    5.582
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.782    4.955
 calculate_norms                   2376  9.8    4.168    4.485    4.168    4.485
 mp_allgather_i34                   111  8.7    2.109    4.387    2.109    4.387
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.021    4.008    4.340
 dbcsr_matrix_vector_mult_local     304 10.0    3.211    3.719    3.213    3.721
 dbcsr_sort_data                    658 11.4    3.086    3.353    3.086    3.353
 ls_scf_post                          1  4.0    0.003    0.026    3.291    3.325
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.841    3.075
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.017    3.067
 dbcsr_merge_single_wm              555 10.7    0.542    0.663    2.833    3.067
 ls_scf_store_result                  1  5.0    0.000    0.000    2.974    3.052
 dbcsr_data_release               10477 10.7    1.607    2.441    1.607    2.441
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.043    2.044
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=101.477000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2709.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.751499E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265536.
 MP_Allreduce         3129                  15263.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.115    0.122   92.908   92.909
 qs_energies                          1  2.0    0.000    0.000   92.024   92.030
 ls_scf                               1  3.0    0.000    0.000   90.089   90.094
 dbcsr_multiply_generic             111  6.7    0.017    0.019   71.643   71.906
 ls_scf_main                          1  4.0    0.000    0.000   57.171   57.171
 multiply_cannon                    111  7.7    0.076    0.167   53.132   56.212
 multiply_cannon_loop               111  8.7    0.088    0.093   50.519   52.068
 density_matrix_trs4                  2  5.0    0.002    0.003   50.207   50.271
 ls_scf_init_scf                      1  4.0    0.000    0.000   29.496   29.497
 mp_waitall_1                      5436 11.0   25.003   29.348   25.003   29.348
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.261   28.291
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.027   26.043
 multiply_cannon_multrec            444  9.7   13.707   16.258   20.831   22.301
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.976   15.745
 multiply_cannon_metrocomm3         444  9.7    0.001    0.002    6.412   14.732
 make_m2s                           222  7.7    0.005    0.005   13.627   14.584
 make_images                        222  8.7    2.041    2.478   13.560   14.516
 hybrid_alltoall_any                227 10.6    0.801    3.820    8.239    9.883
 make_images_data                   222  9.7    0.003    0.004    8.432    9.867
 multiply_cannon_sync_h2d           444  9.7    6.740    8.158    6.740    8.158
 dbcsr_mm_accdrv_process           3003 10.4    0.420    0.553    6.812    7.956
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.392    7.462    6.392    7.462
 arnoldi_extremal                     4  6.8    0.000    0.000    5.890    5.905
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.890    5.905
 build_subspace                      16  8.4    0.015    0.020    5.491    5.500
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.591    4.829
 mp_sum_l                           887  5.1    2.878    4.502    2.878    4.502
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.021    4.216    4.415
 dbcsr_matrix_vector_mult_local     304 10.0    3.741    4.209    3.743    4.211
 mp_allgather_i34                   111  8.7    1.197    3.826    1.197    3.826
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.577    3.751
 calculate_norms                    792  9.8    3.615    3.709    3.615    3.709
 mp_irecv_dv                       1241 11.2    1.561    3.708    1.561    3.708
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.501    3.580
 ls_scf_post                          1  4.0    0.000    0.000    3.422    3.428
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.999    3.404
 make_images_sizes                  222  9.7    0.000    0.000    0.812    3.254
 mp_alltoall_i44                    222 10.7    0.812    3.254    0.812    3.254
 ls_scf_store_result                  1  5.0    0.000    0.000    3.215    3.244
 dbcsr_finalize                     304  7.8    0.061    0.077    2.196    2.275
 dbcsr_data_new                    4608  9.7    1.788    2.236    1.788    2.236
 dbcsr_merge_all                    275  8.9    0.478    0.521    2.052    2.109
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.004    2.005
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.972    1.973
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    1.972    1.973
 qs_energies_init_hamiltonians        1  3.0    0.001    0.001    1.919    1.919
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=92.909000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3744.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.849891E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284089.
 MP_Allreduce         3123                  21388.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.077    0.089  108.657  108.658
 qs_energies                          1  2.0    0.000    0.000  107.275  107.279
 ls_scf                               1  3.0    0.000    0.000  104.307  104.311
 dbcsr_multiply_generic             111  6.7    0.024    0.027   77.913   78.069
 ls_scf_main                          1  4.0    0.000    0.000   63.316   63.317
 density_matrix_trs4                  2  5.0    0.002    0.003   54.380   54.439
 multiply_cannon                    111  7.7    0.141    0.247   51.951   53.905
 multiply_cannon_loop               111  8.7    0.097    0.098   48.946   49.547
 ls_scf_init_scf                      1  4.0    0.000    0.000   37.208   37.209
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   35.650   35.669
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   32.962   32.972
 mp_waitall_1                      4527 11.1   21.801   25.061   21.801   25.061
 make_m2s                           222  7.7    0.005    0.005   22.297   23.366
 make_images                        222  8.7    3.584    3.897   22.190   23.257
 multiply_cannon_multrec            444  9.7   17.844   18.512   22.480   23.199
 hybrid_alltoall_any                227 10.6    1.656    3.614   12.522   15.240
 make_images_data                   222  9.7    0.003    0.004   12.710   14.831
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.512   13.956
 multiply_cannon_sync_h2d           444  9.7    8.785    8.836    8.785    8.836
 arnoldi_extremal                     4  6.8    0.000    0.000    7.500    7.512
 arnoldi_normal_ev                    4  7.8    0.003    0.009    7.500    7.512
 build_subspace                      16  8.4    0.026    0.036    6.949    6.961
 dbcsr_matrix_vector_mult           304  9.0    0.017    0.034    5.573    5.728
 dbcsr_matrix_vector_mult_local     304 10.0    5.133    5.435    5.135    5.438
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.072    5.166
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.851    5.093
 dbcsr_mm_accdrv_process           1814 10.4    0.291    0.359    4.449    4.580
 compute_matrix_preconditioner        1  6.0    0.002    0.002    4.404    4.410
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.113    4.250    4.113    4.250
 acc_transpose_blocks               444  9.7    0.003    0.003    3.466    3.935
 acc_transpose_blocks_kernels       444 10.7    0.006    0.006    3.384    3.850
 jit_kernel_transpose                 1 13.0    3.378    3.845    3.378    3.845
 ls_scf_post                          1  4.0    0.000    0.000    3.783    3.789
 make_images_sizes                  222  9.7    0.000    0.000    1.447    3.554
 mp_alltoall_i44                    222 10.7    1.446    3.554    1.446    3.554
 ls_scf_store_result                  1  5.0    0.000    0.000    3.503    3.544
 mp_allgather_i34                   111  8.7    1.122    3.500    1.122    3.500
 calculate_norms                    792  9.8    3.237    3.273    3.237    3.273
 dbcsr_finalize                     304  7.8    0.082    0.090    3.072    3.173
 dbcsr_merge_all                    275  8.9    0.884    0.915    2.857    2.953
 qs_energies_init_hamiltonians        1  3.0    0.001    0.002    2.937    2.937
 dbcsr_complete_redistribute          5  7.6    1.428    1.470    2.730    2.846
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.399    2.527
 dbcsr_sort_data                    325 11.1    2.439    2.512    2.439    2.512
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.394    2.396
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.328    2.330
 qs_ks_build_kohn_sham_matrix         3  8.3    0.003    0.005    2.328    2.330
 dbcsr_data_new                    6591  9.6    1.855    2.328    1.855    2.328
 dbcsr_new_transposed                 4  7.5    0.241    0.250    2.285    2.308
 dbcsr_frobenius_norm                74  6.6    2.056    2.134    2.199    2.224
 dbcsr_add_d                        103  6.2    0.000    0.000    2.123    2.202
 dbcsr_add_anytype                  103  7.2    0.858    0.890    2.123    2.202
 dbcsr_data_release               12724 10.6    1.978    2.185    1.978    2.185
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=108.658000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=7057.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d_performance_tests/27/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32        7009386627072       0.0%      0.0%    100.0%
 flops     9 x     9 x    32        7335108845568       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        9866241589248       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        9884108906496       0.0%      0.0%    100.0%
 flops    22 x    22 x    32       13354440523776       0.0%      0.0%    100.0%
 flops    32 x    32 x     9       20607185977344       0.0%      0.0%    100.0%
 flops    32 x    32 x    22       25186560638976       0.0%      0.0%    100.0%
 flops     9 x    32 x    32       28458319085568       0.0%      0.0%    100.0%
 flops    22 x    32 x    32       34782389993472       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       42881542373376       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       55680402235392       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       55680402235392       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       72328573419520       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       383.054662E+12       0.0%      0.0%    100.0%
 flops max/rank                    733.641090E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                        26899403712       0.0%      0.0%    100.0%
 number of processed stacks             118860288       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     226.3
 marketing flops                   780.439111E+12
 -------------------------------------------------------------------------------
 # multiplications                           1445
 max memory usage/rank             593.154048E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged               102097920
 MPI messages size (bytes):
  total size                        37.227590E+12
  min size                           0.000000E+00
  max size                           4.551360E+06
  average size                     364.626312E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              731472                        0
       128 < size <=     8192            11922720              97670922240
      8192 < size <=    32768            24718992             614677610496
     32768 < size <=   131072            20000256            1970081366016
    131072 < size <=  4194304            42515668           24886801223040
   4194304 < size <= 16777216             2208812            9656099886720
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4640                  78072.
 MP_Allreduce        13232                   2081.
 MP_Sync              1064
 MP_Alltoall          2588                4807347.
 MP_SendRecv        168740                  11136.
 MP_ISendRecv        92040                  11136.
 MP_Wait            102830
 MP_comm_split          40
 MP_ISend            26090                  85106.
 MP_IRecv            37890                  59644.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.059  229.667  229.687
 qs_mol_dyn_low                       1  2.0    0.003    0.019  228.705  228.735
 qs_forces                            5  3.8    0.010    0.391  228.580  228.588
 qs_energies                          5  4.8    0.002    0.020  225.251  225.271
 scf_env_do_scf                       5  5.8    0.000    0.009  209.978  209.998
 scf_env_do_scf_inner_loop          105  6.6    0.002    0.011  183.028  183.044
 qs_scf_new_mos                     105  7.6    0.001    0.001  142.748  142.909
 qs_scf_loop_do_ot                  105  8.6    0.001    0.001  142.747  142.909
 dbcsr_multiply_generic            1445 12.2    0.126    0.134  133.734  134.126
 ot_scf_mini                        105  9.6    0.003    0.004  132.846  132.966
 multiply_cannon                   1445 13.2    0.276    0.290  114.906  117.101
 multiply_cannon_loop              1445 14.2    2.856    3.017  113.139  114.253
 velocity_verlet                      4  3.0    0.003    0.014  106.197  106.206
 ot_mini                            105 10.6    0.001    0.001   60.058   60.165
 multiply_cannon_multrec          69360 15.2   29.663   34.742   39.812   45.244
 qs_ot_get_p                        112 10.4    0.001    0.002   42.922   43.190
 mp_waitall_1                    488190 16.1   34.688   41.763   34.688   41.763
 qs_ot_get_derivative                55 11.6    0.001    0.001   38.180   38.310
 multiply_cannon_sync_h2d         69360 15.2   29.145   33.305   29.145   33.305
 multiply_cannon_metrocomm3       69360 15.2    0.200    0.211   25.604   33.147
 qs_ot_p2m_diag                      40 11.0    0.020    0.031   31.924   32.006
 rebuild_ks_matrix                  110  8.4    0.000    0.000   29.108   29.271
 qs_ks_build_kohn_sham_matrix       110  9.4    0.011    0.016   29.107   29.271
 cp_dbcsr_syevd                      40 12.0    0.002    0.002   28.693   28.696
 qs_ks_update_qs_env                112  7.6    0.001    0.001   26.798   26.947
 init_scf_loop                        7  6.6    0.001    0.043   26.916   26.919
 cp_fm_syevd                         40 13.0    0.000    0.000   23.503   23.653
 apply_preconditioner_dbcsr          62 12.6    0.000    0.000   23.099   23.326
 apply_single                        62 13.6    0.000    0.000   23.099   23.326
 prepare_preconditioner               7  7.6    0.000    0.001   22.018   22.052
 make_preconditioner                  7  8.6    0.000    0.002   22.018   22.052
 ot_new_cg_direction                 55 11.6    0.001    0.001   21.095   21.098
 cp_fm_redistribute_end              40 14.0    9.402   18.757    9.407   18.759
 cp_fm_syevd_base                    40 14.0    9.344   18.702    9.344   18.702
 qs_rho_update_rho_low              110  7.6    0.001    0.001   16.872   17.270
 calculate_rho_elec                 110  8.6    0.030    0.032   16.872   17.270
 make_full_inverse_cholesky           7  9.6    0.000    0.000   14.791   14.857
 qs_ot_get_orbitals                 105 10.6    0.001    0.001   14.649   14.813
 qs_ot_get_derivative_taylor         37 12.8    0.001    0.001   13.625   13.750
 mp_sum_l                          4764 12.2   11.911   12.758   11.911   12.758
 init_scf_run                         5  5.8    0.000    0.001   12.451   12.452
 scf_env_initial_rho_setup            5  6.8    0.000    0.001   12.451   12.452
 pw_transfer                       1645 12.4    0.080    0.103   11.769   12.013
 fft_wrap_pw1pw2                   1425 13.5    0.012    0.016   11.630   11.878
 density_rs2pw                      110  9.6    0.005    0.007   11.143   11.739
 calculate_dm_sparse                110  9.5    0.000    0.001   11.570   11.729
 dbcsr_mm_accdrv_process         154766 15.8    6.220    6.449   10.017   11.066
 qs_vxc_create                      110 10.4    0.002    0.004   10.521   10.571
 fft_wrap_pw1pw2_240                915 15.0    1.143    1.242   10.244   10.467
 cp_fm_cholesky_invert                7 10.6   10.339   10.349   10.339   10.349
 qs_ot_get_derivative_diag           18 12.0    0.000    0.001   10.265   10.332
 check_diag                          80 13.5    8.598    8.892    9.517    9.685
 fft3d_pb                           915 16.0    2.394    2.677    8.445    8.741
 sum_up_and_integrate                60 10.3    0.001    0.002    8.557    8.569
 acc_transpose_blocks             69360 15.2    0.353    0.369    8.109    8.568
 integrate_v_rspace                  60 11.3    0.002    0.003    8.540    8.552
 transfer_rs2pw                     445 10.6    0.007    0.009    7.868    8.486
 cp_dbcsr_sm_fm_multiply             15  9.3    0.001    0.001    7.963    7.979
 calculate_first_density_matrix       1  7.0    0.000    0.003    7.639    7.651
 xc_rho_set_and_dset_create         110 12.4    0.077    0.099    7.357    7.613
 cp_dbcsr_sm_fm_multiply_core        15 10.3    0.000    0.000    7.476    7.539
 xc_vxc_pw_create                    60 11.3    0.039    0.050    7.021    7.070
 make_m2s                          2890 13.2    0.078    0.087    6.426    7.067
 make_full_single_inverse             7  9.6    0.001    0.001    6.953    6.988
 make_images                       2890 14.2    0.240    0.260    6.320    6.961
 multiply_cannon_metrocomm1       69360 15.2    0.096    0.102    4.512    6.946
 xc_pw_derive                       510 13.4    0.005    0.007    6.260    6.342
 mp_alltoall_z22v                  2340 17.7    5.478    5.736    5.478    5.736
 acc_transpose_blocks_kernels     69360 16.2    0.846    0.896    5.353    5.684
 mp_waitany                        7680 13.5    4.473    5.372    4.473    5.372
 multiply_cannon_metrocomm4       67915 15.2    0.184    0.202    2.021    4.842
 potential_pw2rs                     60 12.3    0.003    0.003    4.790    4.815
 jit_kernel_transpose                 5 15.0    4.506    4.811    4.506    4.811
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="601", plot="h2o_512_md", label="(64n/12r/1t)", y=229.687000, yerr=0.000000
PlotPoint: name="602", plot="h2o_512_md_mem", label="(64n/12r/1t)", y=562.600000, yerr=3.322650
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 7c90856a675bdfbc0d30dfa5c6a14958c10a4a2d
Summary: empty
Status: OK