=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: fb438e6dbebd23a036a1436a9c376208ffa38d38


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10,
#              COSMA 2.6.2, ELPA 2021.11.002, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 5.2.3, LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.8.0,
#              SIRIUS 7.3.2, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Author: Matthias Krack (15.09.2022)
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.2
USE_ELPA       := 2021.11.002
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 5.2.3
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.8.0
#USE_QUIP       := b4336484fb65b0e73211a8f920ae4361c7c353fd
USE_SIRIUS     := 7.3.2
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.4
HDF5_VER       := 1.12.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MPI_VERSION=3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   HDF5_VER       := $(strip $(HDF5_VER))
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(HDF5_VER)/lib
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__HDF5
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta_prefixed_scalapack.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/01
 job id: 41477302
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/02
 job id: 41477305
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/03
 job id: 41477306
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/04
 job id: 41477308
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/05
 job id: 41477309
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/06
 job id: 41477310
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/07
 job id: 41477313
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/08
 job id: 41477314
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/09
 job id: 41477316
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/10
 job id: 41477317
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/11
 job id: 41477319
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/12
 job id: 41477320
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/13
 job id: 41477321
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/14
 job id: 41477324
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/15
 job id: 41477325
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/16
 job id: 41477326
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/17
 job id: 41477327
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/18
 job id: 41477328
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/19
 job id: 41477330
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/20
 job id: 41477331
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/21
 job id: 41477332
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/22
 job id: 41477334
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/23
 job id: 41477335
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/24
 job id: 41477338
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/25
 job id: 41477339
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/26
 job id: 41477340
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          344                      9.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.028    0.036  134.131  134.131
 farming_run                          1  2.0  133.641  133.643  134.097  134.101
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.449902E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              228                1113141.
 MP_Allreduce          489                2263609.
 MP_Sync                27
 MP_Alltoall            38                9316958.
 MP_SendRecv            30                 829726.
 MP_ISendRecv          135                 235435.
 MP_Wait               281
 MP_comm_split           8
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.026  116.815  116.816
 qs_energies                          1  2.0    0.000    0.000  116.604  116.607
 mp2_main                             1  3.0    0.000    0.000  114.718  114.721
 mp2_gpw_main                         1  4.0    0.020    0.025  113.885  113.889
 mp2_ri_gpw_compute_in                1  5.0    0.171    0.172   94.805   94.888
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.004   56.195   56.278
 mp2_eri_3c_integrate_gpw           272  7.0    0.151    0.162   42.477   47.409
 get_2c_integrals                     1  6.0    0.000    0.000   37.566   38.438
 integrate_v_rspace                 273  8.0    0.439    0.452   25.147   29.724
 pw_transfer                       6555 10.6    0.374    0.391   27.522   28.039
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.046   26.158   26.468
 grid_integrate_task_list           273  9.0   20.902   25.981   20.902   25.981
 fft_wrap_pw1pw2_100               2178 12.4    1.206    1.422   23.724   24.056
 compute_2c_integrals                 1  7.0    0.002    0.002   20.334   20.335
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.004   19.640   20.043
 mp2_eri_2c_integrate_gpw             1  9.0    2.383    2.436   19.637   20.040
 rpa_ri_compute_en                    1  5.0    0.000    0.000   18.971   19.042
 cp_fm_cholesky_decompose            12  8.2   17.215   18.139   17.215   18.139
 cholesky_decomp                      1  7.0    0.000    0.000   16.078   16.974
 fft3d_s                           5443 13.4   16.166   16.426   16.187   16.448
 ao_to_mo_and_store_B_mult_1        272  7.0   10.872   15.588   10.872   15.588
 calculate_wavefunction             272  8.0    5.414    5.499   12.546   13.248
 rpa_num_int                          1  6.0    0.000    0.000   10.588   10.588
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.518   10.546
 calc_potential_gpw                 544  9.5    0.005    0.006    9.684   10.044
 calc_mat_Q                           8  8.0    0.000    0.000    9.332    9.417
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.001    8.943    9.205
 contract_S_to_Q                      8  9.0    0.000    0.000    8.752    8.837
 potential_pw2rs                    545 10.0    0.106    0.108    7.744    8.442
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.332    8.422
 parallel_gemm_fm_cosma              14 10.1    8.332    8.422    8.332    8.422
 create_integ_mat                     1  6.0    0.023    0.029    8.206    8.206
 collocate_single_gaussian          272 10.0    0.040    0.042    7.491    7.729
 array2fm                             1  7.0    0.000    0.000    7.162    7.685
 pw_scatter_s                      2720 13.7    4.447    4.599    4.447    4.599
 pw_gather_s                       2722 13.2    3.978    4.451    3.978    4.451
 pw_poisson_solve                   545 10.5    2.488    2.529    3.558    3.735
 array2fm_buffer_send                 1  8.0    3.206    3.477    3.206    3.477
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=113.888594, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2721.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          344                     10.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.028    0.038  397.025  397.027
 farming_run                          1  2.0  396.360  396.364  396.987  396.989
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827141120       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788822       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.227268E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              703                 408373.
 MP_Allreduce         1825                  23678.
 MP_Sync                38
 MP_Alltoall            77
 MP_SendRecv          2171                2843495.
 MP_ISendRecv         1739                 144022.
 MP_Wait              2051
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.047  210.485  210.486
 qs_energies                          1  2.0    0.000    0.002  210.202  210.235
 scf_env_do_scf                       1  3.0    0.000    0.000  106.170  106.170
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  105.300  105.307
 rebuild_ks_matrix                    4  6.0    0.000    0.000  105.298  105.306
 qs_ks_build_kohn_sham_matrix         4  7.0    0.055    0.062  105.298  105.306
 hfx_ks_matrix                        4  8.0    0.001    0.001  104.913  104.916
 integrate_four_center                4  9.0    0.144    0.457  104.913  104.916
 mp2_main                             1  3.0    0.003    0.026  103.737  103.768
 mp2_gpw_main                         1  4.0    0.035    0.080  102.837  102.847
 integrate_four_center_main           4 10.0    0.104    0.490   96.918   99.336
 integrate_four_center_bin          263 11.0   96.813   98.978   96.813   98.978
 init_scf_loop                        1  4.0    0.000    0.000   92.066   92.066
 mp2_ri_gpw_compute_in                1  5.0    0.064    0.064   75.885   76.968
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   54.946   56.029
 mp2_eri_3c_integrate_gpw            91  7.0    0.145    0.161   42.668   47.753
 integrate_v_rspace                  95  8.0    0.399    0.569   28.618   33.513
 pw_transfer                       2240 10.6    0.146    0.160   30.167   30.599
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.020   29.140   29.610
 grid_integrate_task_list            95  9.0   23.889   28.963   23.889   28.963
 ao_to_mo_and_store_B_mult_1         91  7.0   10.586   28.903   10.586   28.903
 mp2_ri_gpw_compute_en                1  5.0    0.057    0.083   26.802   28.519
 fft_wrap_pw1pw2_100                730 12.4    1.286    1.456   26.876   27.295
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.846    1.904   25.063   25.074
 get_2c_integrals                     1  6.0    0.001    0.008   20.830   20.876
 compute_2c_integrals                 1  7.0    0.003    0.004   19.804   19.810
 compute_2c_integrals_loop_lm         1  8.0    0.001    0.002   19.343   19.671
 mp2_eri_2c_integrate_gpw             1  9.0    1.731    1.859   19.342   19.671
 fft3d_s                           1823 13.4   18.527   18.870   18.540   18.885
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.000   14.102   14.103
 calculate_wavefunction              91  8.0    2.022    2.055    9.796   10.092
 potential_pw2rs                    186 10.0    0.033    0.034    8.709    9.448
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.556    0.595    8.775    9.323
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001    8.661    9.071
 calc_potential_gpw                 182  9.5    0.002    0.002    8.654    8.952
 local_gemm                         172  8.0    8.219    8.749    8.219    8.749
 mp2_ri_gpw_compute_en_comm          22  7.0    0.504    0.519    8.055    8.404
 collocate_single_gaussian           91 10.0    0.017    0.022    7.943    8.230
 mp2_ri_gpw_compute_en_ener         172  7.0    6.341    6.420    6.341    6.420
 mp_sendrecv_dm3                   2068  8.0    6.074    6.404    6.074    6.404
 mp_sync                             38 10.4    2.669    6.109    2.669    6.109
 pw_gather_s                        912 13.2    4.989    5.477    4.989    5.477
 pw_scatter_s                       910 13.7    3.979    4.219    3.979    4.219
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=102.830795, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1514.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             450.322432E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62385.
 MP_Allreduce        10249                    271.
 MP_Sync               580
 MP_Alltoall          2083
 MP_ISendRecv        45220                   5520.
 MP_Wait             60486
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.040    0.177   52.583   52.584
 qs_mol_dyn_low                       1  2.0    0.005    0.020   52.085   52.098
 qs_forces                           11  3.9    0.005    0.031   51.992   51.994
 qs_energies                         11  4.9    0.010    0.069   50.484   50.492
 scf_env_do_scf                      11  5.9    0.000    0.001   44.138   44.138
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   42.054   42.054
 dbcsr_multiply_generic            2286 12.5    0.093    0.098   33.395   33.903
 qs_scf_new_mos                     108  7.5    0.000    0.001   31.822   32.108
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   31.822   32.108
 ot_scf_mini                        108  9.5    0.002    0.002   30.169   30.327
 multiply_cannon                   2286 13.5    0.180    0.189   25.991   27.789
 multiply_cannon_loop              2286 14.5    1.475    1.552   25.282   27.022
 velocity_verlet                     10  3.0    0.009    0.018   25.365   25.368
 ot_mini                            108 10.5    0.001    0.002   19.331   19.569
 qs_ot_get_derivative               108 11.5    0.001    0.002   16.407   16.617
 mp_waitall_1                    267858 16.1    8.746   15.197    8.746   15.197
 multiply_cannon_metrocomm3       54864 15.5    0.066    0.071    5.975   13.455
 multiply_cannon_multrec          54864 15.5    4.238    6.609    7.716   11.154
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.020    8.160
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.015    8.020    8.160
 multiply_cannon_sync_h2d         54864 15.5    5.891    7.385    5.891    7.385
 mp_sum_l                          7207 12.9    5.461    7.263    5.461    7.263
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.087    7.214
 qs_ot_get_p                        119 10.4    0.001    0.003    6.257    6.592
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.455    5.903
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    5.584    5.678
 init_scf_run                        11  5.9    0.000    0.001    4.809    4.809
 scf_env_initial_rho_setup           11  6.9    0.000    0.002    4.809    4.809
 dbcsr_mm_accdrv_process          76910 16.1    1.150    1.814    3.402    4.731
 sum_up_and_integrate               119 10.3    0.008    0.012    4.684    4.692
 integrate_v_rspace                 119 11.3    0.002    0.003    4.676    4.685
 qs_rho_update_rho_low              119  7.7    0.000    0.001    4.168    4.274
 calculate_rho_elec                 119  8.7    0.011    0.016    4.168    4.274
 qs_ot_p2m_diag                      50 11.0    0.004    0.007    3.449    3.522
 multiply_cannon_metrocomm1       54864 15.5    0.052    0.058    1.798    3.371
 rs_pw_transfer                     974 11.9    0.012    0.013    3.179    3.314
 calculate_dm_sparse                119  9.5    0.000    0.000    2.974    3.123
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.884    3.074
 apply_single                       119 13.6    0.000    0.000    2.884    3.074
 jit_kernel_multiply                 13 15.8    2.192    2.860    2.192    2.860
 calculate_first_density_matrix       1  7.0    0.001    0.003    2.696    2.699
 ot_diis_step                       108 11.5    0.006    0.009    2.676    2.676
 cp_dbcsr_syevd                      50 12.0    0.002    0.003    2.610    2.611
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.386    2.466
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.377    2.377
 density_rs2pw                      119  9.7    0.004    0.004    2.285    2.374
 cp_fm_redistribute_end              50 14.0    2.159    2.355    2.164    2.357
 cp_fm_diag_elpa_base                50 14.0    0.191    2.310    0.192    2.323
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.274    2.276
 acc_transpose_blocks             54864 15.5    0.222    0.241    1.740    2.189
 grid_integrate_task_list           119 12.3    2.028    2.118    2.028    2.118
 potential_pw2rs                    119 12.3    0.004    0.004    2.069    2.092
 init_scf_loop                       11  6.9    0.000    0.001    2.066    2.066
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.019    2.058
 wfi_extrapolate                     11  7.9    0.001    0.002    2.057    2.057
 mp_sum_d                          4129 12.0    1.395    2.029    1.395    2.029
 make_m2s                          4572 13.5    0.053    0.055    1.728    1.771
 pw_transfer                       1439 11.6    0.051    0.055    1.626    1.701
 make_images                       4572 14.5    0.133    0.138    1.647    1.689
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    1.552    1.629
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.485    1.497
 mp_alltoall_d11v                  2130 13.8    1.294    1.475    1.294    1.475
 mp_waitany                       12084 13.8    1.203    1.416    1.203    1.416
 fft3d_ps                          1201 14.6    0.356    0.458    1.326    1.396
 grid_collocate_task_list           119  9.7    1.288    1.356    1.288    1.356
 fft_wrap_pw1pw2_140                487 13.2    0.083    0.096    1.182    1.259
 dbcsr_dot_sd                      1205 11.9    0.047    0.058    0.762    1.159
 acc_transpose_blocks_kernels     54864 16.5    0.246    0.380    0.780    1.064
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=52.584000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=429.090909, yerr=1.239835
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             487.202816E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62664.
 MP_Allreduce        10226                    305.
 MP_Sync               104
 MP_Alltoall          2060                2017496.
 MP_ISendRecv        33558                  37093.
 MP_Wait             40318
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.079    0.137   38.248   38.250
 qs_mol_dyn_low                       1  2.0    0.006    0.016   37.876   37.883
 qs_forces                           11  3.9    0.002    0.008   37.752   37.755
 qs_energies                         11  4.9    0.010    0.043   36.054   36.060
 scf_env_do_scf                      11  5.9    0.000    0.001   30.647   30.648
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   28.207   28.209
 dbcsr_multiply_generic            2286 12.5    0.100    0.103   21.114   21.480
 qs_scf_new_mos                     108  7.5    0.001    0.001   19.406   19.650
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   19.405   19.649
 ot_scf_mini                        108  9.5    0.003    0.004   18.519   18.693
 velocity_verlet                     10  3.0    0.012    0.042   17.889   17.890
 multiply_cannon                   2286 13.5    0.207    0.216   16.226   17.772
 multiply_cannon_loop              2286 14.5    0.898    0.973   15.159   16.515
 ot_mini                            108 10.5    0.001    0.002   11.610   11.844
 mp_waitall_1                    217478 16.2    5.793   10.949    5.793   10.949
 multiply_cannon_metrocomm3       27432 15.5    0.068    0.071    4.112    9.442
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.167    9.342
 multiply_cannon_multrec          27432 15.5    1.966    4.339    5.894    8.723
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.074    7.217
 qs_ks_build_kohn_sham_matrix       119  9.3    0.014    0.021    7.074    7.217
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.236    6.367
 dbcsr_mm_accdrv_process          47894 16.0    3.006    5.090    3.859    5.710
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.534    4.387
 qs_ot_get_p                        119 10.4    0.001    0.002    4.067    4.299
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.037    4.114
 apply_single                       119 13.6    0.000    0.000    3.037    4.114
 sum_up_and_integrate               119 10.3    0.015    0.017    4.099    4.106
 integrate_v_rspace                 119 11.3    0.002    0.005    4.083    4.090
 mp_sum_l                          7207 12.9    2.076    3.963    2.076    3.963
 init_scf_run                        11  5.9    0.000    0.001    3.866    3.867
 scf_env_initial_rho_setup           11  6.9    0.000    0.002    3.866    3.867
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.513    3.544
 calculate_rho_elec                 119  8.7    0.021    0.024    3.513    3.543
 multiply_cannon_sync_h2d         27432 15.5    2.199    2.873    2.199    2.873
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    2.647    2.666
 make_m2s                          4572 13.5    0.052    0.054    2.415    2.630
 rs_pw_transfer                     974 11.9    0.010    0.012    2.513    2.601
 make_images                       4572 14.5    0.200    0.237    2.328    2.542
 init_scf_loop                       11  6.9    0.001    0.004    2.421    2.429
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.396    2.399
 ot_diis_step                       108 11.5    0.011    0.013    2.394    2.395
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.218    2.219
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.082    2.172
 calculate_dm_sparse                119  9.5    0.000    0.000    2.077    2.157
 density_rs2pw                      119  9.7    0.004    0.004    1.882    1.992
 grid_integrate_task_list           119 12.3    1.826    1.937    1.826    1.937
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.907    1.908
 cp_fm_redistribute_end              50 14.0    1.575    1.886    1.578    1.887
 potential_pw2rs                    119 12.3    0.006    0.006    1.862    1.870
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.859    1.861
 cp_fm_diag_elpa_base                50 14.0    0.300    1.830    0.307    1.860
 jit_kernel_multiply                  9 16.1    0.802    1.780    0.802    1.780
 pw_transfer                       1439 11.6    0.063    0.066    1.746    1.778
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.670    1.713
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.658    1.690
 make_images_data                  4572 15.5    0.045    0.052    1.125    1.532
 prepare_preconditioner              11  7.9    0.000    0.000    1.490    1.518
 make_preconditioner                 11  8.9    0.000    0.001    1.490    1.517
 acc_transpose_blocks             27432 15.5    0.107    0.111    1.185    1.487
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.393    1.450
 hybrid_alltoall_any               4725 16.4    0.051    0.112    0.973    1.440
 wfi_extrapolate                     11  7.9    0.001    0.001    1.421    1.422
 fft3d_ps                          1201 14.6    0.500    0.555    1.354    1.390
 fft_wrap_pw1pw2_140                487 13.2    0.087    0.107    1.323    1.355
 mp_alltoall_d11v                  2130 13.8    1.151    1.351    1.151    1.351
 grid_collocate_task_list           119  9.7    1.241    1.323    1.241    1.323
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.295    1.303
 mp_allgather_i34                  2286 14.5    0.512    1.253    0.512    1.253
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.152    1.203
 mp_sum_d                          4129 12.0    0.604    1.059    0.604    1.059
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    1.031    1.035
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.894    0.907
 acc_transpose_blocks_kernels     27432 16.5    0.180    0.269    0.658    0.874
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.703    0.769
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=38.250000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=463.181818, yerr=1.898238
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             520.286208E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62660.
 MP_Allreduce        10225                    303.
 MP_Sync               104
 MP_Alltoall          1821                1607811.
 MP_ISendRecv        22134                  57667.
 MP_Wait             33054
 MP_comm_split          50
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.027   31.664   31.665
 qs_mol_dyn_low                       1  2.0    0.003    0.003   30.992   31.473
 qs_forces                           11  3.9    0.002    0.002   30.920   30.921
 qs_energies                         11  4.9    0.001    0.002   29.361   29.363
 scf_env_do_scf                      11  5.9    0.000    0.001   24.714   24.715
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   22.231   22.231
 dbcsr_multiply_generic            2286 12.5    0.093    0.094   16.082   16.188
 velocity_verlet                     10  3.0    0.002    0.002   14.921   14.922
 qs_scf_new_mos                     108  7.5    0.001    0.001   14.391   14.411
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   14.390   14.411
 multiply_cannon                   2286 13.5    0.193    0.201   12.949   13.745
 ot_scf_mini                        108  9.5    0.002    0.002   13.674   13.690
 multiply_cannon_loop              2286 14.5    0.633    0.667   12.182   13.003
 ot_mini                            108 10.5    0.001    0.001    8.562    8.582
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.076    7.094
 multiply_cannon_multrec          18288 15.5    1.934    2.940    6.726    7.015
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.249    6.269
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.012    6.248    6.268
 dbcsr_mm_accdrv_process          38222 16.0    4.059    5.474    4.707    5.578
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.518    5.537
 mp_waitall_1                    169478 16.3    2.709    3.895    2.709    3.895
 sum_up_and_integrate               119 10.3    0.018    0.019    3.889    3.894
 integrate_v_rspace                 119 11.3    0.002    0.002    3.870    3.877
 init_scf_run                        11  5.9    0.000    0.001    3.380    3.380
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.380    3.380
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.637    3.255
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.171    3.176
 calculate_rho_elec                 119  8.7    0.030    0.031    3.170    3.176
 qs_ot_get_p                        119 10.4    0.001    0.001    2.972    2.988
 init_scf_loop                       11  6.9    0.000    0.000    2.463    2.463
 multiply_cannon_metrocomm3       18288 15.5    0.045    0.047    1.338    2.456
 rs_pw_transfer                     974 11.9    0.009    0.010    2.233    2.372
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.019    2.341
 apply_single                       119 13.6    0.000    0.000    2.019    2.341
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.206    2.207
 jit_kernel_multiply                 11 16.3    0.597    2.028    0.597    2.028
 qs_ot_p2m_diag                      50 11.0    0.012    0.012    1.960    1.966
 grid_integrate_task_list           119 12.3    1.812    1.923    1.812    1.923
 density_rs2pw                      119  9.7    0.003    0.004    1.769    1.901
 make_m2s                          4572 13.5    0.044    0.045    1.710    1.852
 calculate_dm_sparse                119  9.5    0.000    0.000    1.798    1.808
 make_images                       4572 14.5    0.190    0.203    1.625    1.765
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.704    1.704
 prepare_preconditioner              11  7.9    0.000    0.000    1.697    1.699
 make_preconditioner                 11  8.9    0.000    0.000    1.697    1.699
 potential_pw2rs                    119 12.3    0.007    0.008    1.686    1.690
 pw_transfer                       1439 11.6    0.063    0.066    1.668    1.677
 multiply_cannon_sync_h2d         18288 15.5    1.428    1.659    1.428    1.659
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.653    1.654
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.550    1.635
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.578    1.589
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.527    1.536
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.519    1.525
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.486    1.487
 cp_fm_redistribute_end              50 14.0    1.104    1.468    1.105    1.468
 ot_diis_step                       108 11.5    0.011    0.011    1.465    1.466
 cp_fm_diag_elpa_base                50 14.0    0.347    1.409    0.361    1.446
 mp_sum_l                          7207 12.9    1.035    1.401    1.035    1.401
 grid_collocate_task_list           119  9.7    1.204    1.285    1.204    1.285
 fft3d_ps                          1201 14.6    0.506    0.525    1.264    1.274
 fft_wrap_pw1pw2_140                487 13.2    0.088    0.091    1.258    1.268
 acc_transpose_blocks             18288 15.5    0.074    0.076    1.235    1.258
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.173    1.177
 wfi_extrapolate                     11  7.9    0.001    0.001    1.131    1.131
 multiply_cannon_metrocomm1       18288 15.5    0.029    0.030    0.428    1.055
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    0.955    0.956
 make_images_data                  4572 15.5    0.045    0.050    0.768    0.928
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.858    0.879
 hybrid_alltoall_any               4725 16.4    0.055    0.114    0.652    0.843
 mp_alltoall_d11v                  2130 13.8    0.645    0.819    0.645    0.819
 acc_transpose_blocks_kernels     18288 16.5    0.207    0.216    0.800    0.811
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.799    0.801
 cp_fm_cholesky_invert               11 10.9    0.715    0.719    0.715    0.719
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.656    0.718
 rs_pw_transfer_RS2PW_140           130 11.5    0.176    0.180    0.572    0.707
 mp_alltoall_z22v                  1201 16.6    0.608    0.670    0.608    0.670
 mp_waitany                        9880 13.7    0.520    0.634    0.520    0.634
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=31.665000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=494.818182, yerr=2.166614
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             545.370112E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62659.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                2412273.
 MP_ISendRecv        16422                  74133.
 MP_Wait             24482
 MP_comm_split          50
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.029   35.337   35.338
 qs_mol_dyn_low                       1  2.0    0.003    0.003   35.122   35.129
 qs_forces                           11  3.9    0.001    0.002   35.061   35.062
 qs_energies                         11  4.9    0.001    0.001   33.312   33.317
 scf_env_do_scf                      11  5.9    0.000    0.001   28.051   28.052
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   24.578   24.578
 dbcsr_multiply_generic            2286 12.5    0.099    0.102   18.525   18.635
 velocity_verlet                     10  3.0    0.005    0.008   18.016   18.018
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.284   16.338
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.283   16.337
 ot_scf_mini                        108  9.5    0.002    0.003   15.345   15.400
 multiply_cannon                   2286 13.5    0.231    0.265   14.814   15.203
 multiply_cannon_loop              2286 14.5    0.934    0.964   13.848   14.175
 ot_mini                            108 10.5    0.001    0.001    9.495    9.563
 multiply_cannon_multrec          27432 15.5    2.381    3.053    8.895    9.230
 dbcsr_mm_accdrv_process          47916 15.9    5.383    7.554    6.420    7.740
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.675    7.733
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.553    6.611
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.012    6.553    6.610
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.808    5.860
 init_scf_run                        11  5.9    0.000    0.001    3.883    3.884
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.883    3.884
 sum_up_and_integrate               119 10.3    0.021    0.024    3.805    3.813
 integrate_v_rspace                 119 11.3    0.002    0.002    3.783    3.792
 init_scf_loop                       11  6.9    0.000    0.000    3.448    3.448
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.404    3.439
 calculate_rho_elec                 119  8.7    0.040    0.046    3.403    3.439
 qs_ot_get_p                        119 10.4    0.001    0.001    3.221    3.300
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.848    3.235
 mp_waitall_1                    145218 16.4    2.289    2.778    2.289    2.778
 prepare_preconditioner              11  7.9    0.000    0.000    2.609    2.617
 make_preconditioner                 11  8.9    0.000    0.000    2.609    2.617
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.210    2.540
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.480    2.482
 make_m2s                          4572 13.5    0.054    0.056    2.273    2.366
 make_images                       4572 14.5    0.273    0.330    2.167    2.258
 rs_pw_transfer                     974 11.9    0.009    0.009    2.145    2.214
 calculate_dm_sparse                119  9.5    0.000    0.000    2.142    2.192
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.040    2.163
 apply_single                       119 13.6    0.000    0.000    2.040    2.163
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.949    1.979
 density_rs2pw                      119  9.7    0.003    0.004    1.895    1.952
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    1.912    1.924
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.920    1.921
 grid_integrate_task_list           119 12.3    1.819    1.906    1.819    1.906
 jit_kernel_multiply                  9 16.0    0.978    1.871    0.978    1.871
 pw_transfer                       1439 11.6    0.063    0.066    1.824    1.857
 ot_diis_step                       108 11.5    0.012    0.012    1.779    1.779
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.735    1.772
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.729    1.742
 potential_pw2rs                    119 12.3    0.009    0.009    1.599    1.606
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.593    1.594
 acc_transpose_blocks             27432 15.5    0.111    0.114    1.482    1.513
 fft3d_ps                          1201 14.6    0.534    0.586    1.410    1.438
 fft_wrap_pw1pw2_140                487 13.2    0.090    0.106    1.394    1.431
 multiply_cannon_metrocomm3       27432 15.5    0.038    0.039    0.827    1.414
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.347    1.348
 wfi_extrapolate                     11  7.9    0.001    0.001    1.339    1.339
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.328    1.337
 cp_fm_redistribute_end              50 14.0    0.889    1.324    0.890    1.324
 mp_sum_l                          7207 12.9    1.010    1.309    1.010    1.309
 cp_fm_diag_elpa_base                50 14.0    0.412    1.259    0.432    1.298
 grid_collocate_task_list           119  9.7    1.220    1.292    1.220    1.292
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.118    1.134
 cp_fm_upper_to_full                 72 13.5    0.808    1.121    0.808    1.121
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.116    1.116
 dbcsr_complete_redistribute        329 12.2    0.130    0.152    0.825    1.099
 multiply_cannon_sync_h2d         27432 15.5    0.995    1.060    0.995    1.060
 make_images_data                  4572 15.5    0.045    0.049    0.900    1.009
 hybrid_alltoall_any               4725 16.4    0.062    0.152    0.782    0.944
 mp_alltoall_d11v                  2130 13.8    0.785    0.922    0.785    0.922
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.802    0.881
 acc_transpose_blocks_kernels     27432 16.5    0.266    0.276    0.861    0.881
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.593    0.862
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.836    0.840
 mp_alltoall_z22v                  1201 16.6    0.758    0.786    0.758    0.786
 cp_fm_cholesky_invert               11 10.9    0.779    0.782    0.779    0.782
 mp_alltoall_i22                    627 13.8    0.438    0.741    0.438    0.741
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=35.338000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=517.818182, yerr=3.638636
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             602.222592E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                3682667.
 MP_ISendRecv        10710                  94533.
 MP_Wait             16690
 MP_comm_split          50
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.027   28.371   28.371
 qs_mol_dyn_low                       1  2.0    0.003    0.003   28.208   28.215
 qs_forces                           11  3.9    0.002    0.002   28.097   28.099
 qs_energies                         11  4.9    0.001    0.001   26.361   26.364
 scf_env_do_scf                      11  5.9    0.000    0.001   21.514   21.514
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   18.973   18.973
 velocity_verlet                     10  3.0    0.021    0.025   14.548   14.553
 dbcsr_multiply_generic            2286 12.5    0.092    0.095   12.485   12.615
 qs_scf_new_mos                     108  7.5    0.001    0.001   11.263   11.290
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   11.262   11.289
 ot_scf_mini                        108  9.5    0.002    0.002   10.572   10.599
 multiply_cannon                   2286 13.5    0.239    0.249    9.808   10.067
 multiply_cannon_loop              2286 14.5    0.331    0.340    8.877    9.130
 multiply_cannon_multrec           9144 15.5    1.596    1.816    5.773    6.002
 ot_mini                            108 10.5    0.001    0.001    5.964    5.996
 rebuild_ks_matrix                  119  8.3    0.000    0.000    5.901    5.922
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.012    5.901    5.921
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.245    5.264
 qs_ot_get_derivative               108 11.5    0.001    0.001    4.655    4.683
 dbcsr_mm_accdrv_process          12550 15.8    3.143    3.929    4.075    4.153
 sum_up_and_integrate               119 10.3    0.023    0.024    3.610    3.614
 integrate_v_rspace                 119 11.3    0.002    0.003    3.587    3.591
 init_scf_run                        11  5.9    0.000    0.001    3.405    3.405
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.405    3.405
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.308    3.317
 calculate_rho_elec                 119  8.7    0.059    0.061    3.307    3.316
 qs_ot_get_p                        119 10.4    0.001    0.001    2.774    2.806
 mp_waitall_1                    121218 16.5    2.035    2.690    2.035    2.690
 init_scf_loop                       11  6.9    0.000    0.000    2.521    2.522
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.254    2.254
 make_m2s                          4572 13.5    0.034    0.035    1.829    1.974
 grid_integrate_task_list           119 12.3    1.856    1.946    1.856    1.946
 make_images                       4572 14.5    0.267    0.303    1.740    1.884
 rs_pw_transfer                     974 11.9    0.008    0.008    1.787    1.865
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.791    1.793
 density_rs2pw                      119  9.7    0.003    0.004    1.701    1.791
 prepare_preconditioner              11  7.9    0.000    0.000    1.781    1.787
 make_preconditioner                 11  8.9    0.000    0.000    1.781    1.787
 calculate_dm_sparse                119  9.5    0.000    0.000    1.749    1.766
 pw_transfer                       1439 11.6    0.063    0.065    1.732    1.739
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.661    1.685
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.643    1.650
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.607    1.608
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.597    1.598
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.521    1.540
 jit_kernel_multiply                  8 15.6    0.895    1.466    0.895    1.466
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.390    1.396
 potential_pw2rs                    119 12.3    0.010    0.011    1.389    1.392
 grid_collocate_task_list           119  9.7    1.274    1.360    1.274    1.360
 fft_wrap_pw1pw2_140                487 13.2    0.085    0.089    1.345    1.355
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.321    1.321
 fft3d_ps                          1201 14.6    0.539    0.553    1.306    1.312
 cp_fm_redistribute_end              50 14.0    0.656    1.298    0.656    1.298
 ot_diis_step                       108 11.5    0.012    0.013    1.296    1.296
 cp_fm_diag_elpa_base                50 14.0    0.598    1.229    0.640    1.284
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.227    1.228
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.183    1.201
 apply_single                       119 13.6    0.000    0.000    1.183    1.201
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.175    1.188
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.149    1.155
 wfi_extrapolate                     11  7.9    0.001    0.001    1.107    1.107
 hybrid_alltoall_any               4725 16.4    0.063    0.177    0.853    1.081
 make_images_data                  4572 15.5    0.039    0.043    0.897    1.075
 mp_alltoall_d11v                  2130 13.8    0.832    0.941    0.832    0.941
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.878    0.936
 acc_transpose_blocks              9144 15.5    0.039    0.040    0.899    0.910
 cp_fm_cholesky_invert               11 10.9    0.886    0.888    0.886    0.888
 multiply_cannon_metrocomm3        9144 15.5    0.019    0.019    0.474    0.810
 multiply_cannon_sync_h2d          9144 15.5    0.715    0.792    0.715    0.792
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.777    0.779
 multiply_cannon_metrocomm1        9144 15.5    0.022    0.023    0.418    0.770
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.736    0.744
 mp_allgather_i34                  2286 14.5    0.255    0.733    0.255    0.733
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    0.673    0.725
 mp_alltoall_z22v                  1201 16.6    0.641    0.676    0.641    0.676
 acc_transpose_blocks_kernels      9144 16.5    0.116    0.119    0.659    0.666
 mp_sum_l                          7207 12.9    0.416    0.643    0.416    0.643
 dbcsr_complete_redistribute        329 12.2    0.203    0.222    0.599    0.630
 qs_create_task_list                 11  7.9    0.000    0.000    0.543    0.568
 generate_qs_task_list               11  8.9    0.189    0.212    0.542    0.567
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=28.371000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=569.181818, yerr=6.779892
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             742.199296E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63729.
 MP_Allreduce        10074                    433.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_ISendRecv         4998                 189067.
 MP_Wait              8898
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.034   40.297   40.297
 qs_mol_dyn_low                       1  2.0    0.003    0.004   40.071   40.078
 qs_forces                           11  3.9    0.002    0.002   40.009   40.010
 qs_energies                         11  4.9    0.001    0.001   38.048   38.051
 scf_env_do_scf                      11  5.9    0.001    0.001   32.526   32.526
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.006   24.648   24.649
 velocity_verlet                     10  3.0    0.002    0.002   22.890   22.896
 dbcsr_multiply_generic            2286 12.5    0.101    0.102   17.272   17.520
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.808   15.907
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.807   15.906
 ot_scf_mini                        108  9.5    0.002    0.002   14.735   14.836
 multiply_cannon                   2286 13.5    0.298    0.303   13.354   14.375
 multiply_cannon_loop              2286 14.5    0.343    0.350   12.107   13.109
 ot_mini                            108 10.5    0.001    0.001    8.732    8.851
 multiply_cannon_multrec           9144 15.5    3.370    4.670    8.474    8.580
 init_scf_loop                       11  6.9    0.000    0.000    7.840    7.841
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.846    6.993
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    6.846    6.993
 prepare_preconditioner              11  7.9    0.000    0.000    6.904    6.918
 make_preconditioner                 11  8.9    0.000    0.000    6.904    6.918
 qs_ot_get_derivative               108 11.5    0.001    0.001    6.693    6.793
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.443    6.791
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.195    6.328
 dbcsr_mm_accdrv_process          12550 15.8    4.130    5.566    4.979    6.319
 cp_fm_upper_to_full                 72 14.2    3.174    4.574    3.174    4.574
 sum_up_and_integrate               119 10.3    0.038    0.038    3.658    3.665
 integrate_v_rspace                 119 11.3    0.003    0.003    3.620    3.627
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.605    3.610
 calculate_rho_elec                 119  8.7    0.118    0.121    3.605    3.609
 init_scf_run                        11  5.9    0.000    0.001    3.438    3.439
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.438    3.439
 mp_waitall_1                     97218 16.6    2.481    3.436    2.481    3.436
 qs_ot_get_p                        119 10.4    0.001    0.001    3.263    3.405
 dbcsr_complete_redistribute        329 12.2    0.295    0.300    2.021    2.865
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.414    2.861
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.701    2.543
 make_m2s                          4572 13.5    0.037    0.038    2.305    2.470
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.221    2.454
 apply_single                       119 13.6    0.000    0.000    2.221    2.454
 make_images                       4572 14.5    0.351    0.383    2.185    2.351
 mp_alltoall_i22                    627 13.8    1.437    2.323    1.437    2.323
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.456    2.291
 calculate_dm_sparse                119  9.5    0.000    0.000    2.132    2.149
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    1.201    2.128
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.051    2.052
 pw_transfer                       1439 11.6    0.067    0.068    2.037    2.042
 grid_integrate_task_list           119 12.3    2.007    2.012    2.007    2.012
 ot_diis_step                       108 11.5    0.014    0.014    2.008    2.008
 qs_ot_p2m_diag                      50 11.0    0.043    0.044    1.987    1.988
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    1.942    1.946
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.819    1.819
 mp_sum_l                          7207 12.9    1.071    1.792    1.071    1.792
 density_rs2pw                      119  9.7    0.003    0.003    1.773    1.786
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.772    1.773
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.665    1.716
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.713    1.713
 fft_wrap_pw1pw2_140                487 13.2    0.101    0.122    1.627    1.631
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.537    1.585
 fft3d_ps                          1201 14.6    0.566    0.576    1.561    1.577
 cp_fm_cholesky_invert               11 10.9    1.506    1.509    1.506    1.509
 rs_pw_transfer                     974 11.9    0.009    0.009    1.461    1.494
 grid_collocate_task_list           119  9.7    1.446    1.466    1.446    1.466
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.440    1.442
 cp_fm_diag_elpa_base                50 14.0    1.292    1.347    1.438    1.440
 hybrid_alltoall_any               4725 16.4    0.088    0.149    1.120    1.375
 make_images_data                  4572 15.5    0.043    0.046    1.125    1.335
 wfi_extrapolate                     11  7.9    0.001    0.001    1.334    1.334
 potential_pw2rs                    119 12.3    0.014    0.014    1.235    1.237
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.211    1.218
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.146    1.165
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.142    1.164
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.993    1.054
 multiply_cannon_sync_h2d          9144 15.5    1.041    1.045    1.041    1.045
 mp_alltoall_d11v                  2130 13.8    1.008    1.021    1.008    1.021
 jit_kernel_multiply                  6 15.6    0.822    1.019    0.822    1.019
 qs_create_task_list                 11  7.9    0.000    0.000    0.942    0.955
 generate_qs_task_list               11  8.9    0.372    0.392    0.942    0.955
 acc_transpose_blocks              9144 15.5    0.038    0.039    0.899    0.901
 mp_alltoall_z22v                  1201 16.6    0.859    0.896    0.859    0.896
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.861    0.875
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=40.297000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=701.090909, yerr=10.273130
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             500.084736E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65372.
 MP_Allreduce         9840                    486.
 MP_Sync               100
 MP_Alltoall          1938                1926918.
 MP_ISendRecv        41800                   9096.
 MP_Wait             58168
 MP_comm_split          48
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.028   82.133   82.135
 qs_mol_dyn_low                       1  2.0    0.004    0.014   81.839   81.849
 qs_forces                           11  3.9    0.002    0.002   81.732   81.740
 qs_energies                         11  4.9    0.001    0.002   78.882   78.900
 scf_env_do_scf                      11  5.9    0.000    0.001   70.034   70.037
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   64.479   64.479
 dbcsr_multiply_generic            2055 12.4    0.105    0.107   51.628   51.891
 qs_scf_new_mos                      99  7.5    0.000    0.001   47.457   47.581
 qs_scf_loop_do_ot                   99  8.5    0.000    0.001   47.457   47.581
 ot_scf_mini                         99  9.5    0.002    0.003   45.014   45.125
 multiply_cannon                   2055 13.4    0.175    0.184   42.198   43.218
 multiply_cannon_loop              2055 14.4    1.549    1.593   41.239   42.310
 velocity_verlet                     10  3.0    0.002    0.006   41.671   41.672
 ot_mini                             99 10.5    0.001    0.006   27.284   27.392
 qs_ot_get_derivative                99 11.5    0.001    0.001   20.453   20.548
 multiply_cannon_multrec          49320 15.4   12.127   12.892   17.139   17.801
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.548   14.633
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.014   14.548   14.633
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.771   12.850
 mp_waitall_1                    241148 16.1   11.379   12.778   11.379   12.778
 multiply_cannon_sync_h2d         49320 15.4    9.970   10.636    9.970   10.636
 qs_ot_get_p                        110 10.4    0.001    0.001    9.300    9.428
 multiply_cannon_metrocomm3       49320 15.4    0.075    0.080    6.760    8.164
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.142    7.592
 apply_single                       110 13.6    0.000    0.001    7.142    7.591
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    6.830    7.394
 sum_up_and_integrate               110 10.3    0.023    0.029    7.134    7.147
 integrate_v_rspace                 110 11.3    0.002    0.003    7.111    7.128
 init_scf_run                        11  5.9    0.000    0.001    6.824    6.824
 scf_env_initial_rho_setup           11  6.9    0.000    0.004    6.824    6.824
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    6.640    6.728
 ot_diis_step                        99 11.5    0.011    0.051    6.547    6.548
 qs_rho_update_rho_low              110  7.6    0.000    0.001    6.204    6.312
 calculate_rho_elec                 110  8.6    0.020    0.024    6.203    6.312
 qs_ot_p2m_diag                      48 11.0    0.012    0.019    6.036    6.063
 mp_sum_l                          6514 12.8    5.088    6.002    5.088    6.002
 init_scf_loop                       11  6.9    0.000    0.000    5.528    5.528
 dbcsr_mm_accdrv_process          87628 16.1    2.075    2.206    4.892    5.236
 cp_dbcsr_syevd                      48 12.0    0.002    0.002    5.032    5.033
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.560    4.561
 cp_fm_redistribute_end              48 14.0    3.973    4.535    3.977    4.536
 cp_fm_diag_elpa_base                48 14.0    0.553    4.441    0.557    4.465
 rs_pw_transfer                     902 11.9    0.012    0.014    4.077    4.299
 make_m2s                          4110 13.4    0.060    0.064    3.887    4.002
 wfi_extrapolate                     11  7.9    0.001    0.003    3.995    3.995
 make_images                       4110 14.4    0.178    0.192    3.792    3.911
 calculate_dm_sparse                110  9.5    0.000    0.001    3.810    3.892
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.626    3.630
 multiply_cannon_metrocomm1       49320 15.4    0.060    0.065    2.556    3.630
 density_rs2pw                      110  9.6    0.004    0.004    3.317    3.477
 grid_integrate_task_list           110 12.3    3.255    3.421    3.255    3.421
 prepare_preconditioner              11  7.9    0.000    0.000    3.369    3.387
 make_preconditioner                 11  8.9    0.000    0.000    3.369    3.387
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.266    3.331
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    3.190    3.251
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.148    3.194
 pw_transfer                       1331 11.6    0.054    0.061    2.761    2.823
 potential_pw2rs                    110 12.3    0.006    0.006    2.755    2.774
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.745    2.748
 fft_wrap_pw1pw2                   1111 12.6    0.007    0.008    2.670    2.734
 jit_kernel_multiply                 13 15.9    2.534    2.547    2.534    2.547
 mp_alltoall_d11v                  2046 13.8    2.063    2.480    2.063    2.480
 fft_wrap_pw1pw2_140                451 13.1    0.170    0.189    2.254    2.326
 acc_transpose_blocks             49320 15.4    0.214    0.226    2.196    2.286
 fft3d_ps                          1111 14.6    0.754    0.845    2.207    2.259
 grid_collocate_task_list           110  9.6    2.089    2.189    2.089    2.189
 mp_sum_d                          3883 11.9    1.489    2.019    1.489    2.019
 mp_waitany                       14300 13.8    1.740    1.943    1.740    1.943
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.917    1.939
 make_images_data                  4110 15.4    0.043    0.046    1.786    1.927
 hybrid_alltoall_any               4261 16.3    0.082    0.482    1.560    1.874
 cp_fm_cholesky_invert               11 10.9    1.833    1.837    1.833    1.837
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.640    1.683
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=82.135000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=474.636364, yerr=2.267266
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             583.249920E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65587.
 MP_Allreduce         9839                    562.
 MP_Sync               100
 MP_Alltoall          1717                1414451.
 MP_ISendRecv        20680                  26400.
 MP_Wait             32692
 MP_comm_split          48
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.040   69.069   69.070
 qs_mol_dyn_low                       1  2.0    0.004    0.005   68.631   68.641
 qs_forces                           11  3.9    0.005    0.014   68.561   68.561
 qs_energies                         11  4.9    0.002    0.004   65.204   65.208
 scf_env_do_scf                      11  5.9    0.001    0.001   56.571   56.574
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   48.862   48.862
 dbcsr_multiply_generic            2055 12.4    0.113    0.116   37.653   37.787
 velocity_verlet                     10  3.0    0.002    0.002   36.311   36.312
 qs_scf_new_mos                      99  7.5    0.001    0.001   32.764   32.891
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   32.763   32.891
 multiply_cannon                   2055 13.4    0.224    0.253   30.920   32.068
 ot_scf_mini                         99  9.5    0.003    0.004   31.105   31.212
 multiply_cannon_loop              2055 14.4    0.929    0.952   29.621   30.630
 ot_mini                             99 10.5    0.001    0.002   18.224   18.346
 multiply_cannon_multrec          24660 15.4    7.633    9.373   13.815   15.579
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.755   13.855
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.018   13.755   13.854
 qs_ot_get_derivative                99 11.5    0.001    0.002   12.427   12.537
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.129   12.207
 mp_waitall_1                    186928 16.3    7.832   10.154    7.832   10.154
 multiply_cannon_sync_h2d         24660 15.4    7.023    7.897    7.023    7.897
 multiply_cannon_metrocomm3       24660 15.4    0.071    0.073    5.111    7.840
 init_scf_loop                       11  6.9    0.000    0.000    7.668    7.669
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.495    7.166
 apply_single                       110 13.6    0.000    0.001    6.495    7.166
 sum_up_and_integrate               110 10.3    0.030    0.035    6.641    6.651
 integrate_v_rspace                 110 11.3    0.002    0.003    6.611    6.624
 qs_ot_get_p                        110 10.4    0.001    0.001    6.224    6.357
 dbcsr_mm_accdrv_process          52282 16.1    4.681    5.418    6.021    6.272
 init_scf_run                        11  5.9    0.000    0.001    6.117    6.118
 scf_env_initial_rho_setup           11  6.9    0.000    0.002    6.117    6.118
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.924    5.936
 calculate_rho_elec                 110  8.6    0.039    0.047    5.924    5.936
 ot_diis_step                        99 11.5    0.011    0.013    5.745    5.746
 prepare_preconditioner              11  7.9    0.000    0.000    5.633    5.651
 make_preconditioner                 11  8.9    0.000    0.001    5.633    5.651
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.670    5.375
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.193    5.351
 make_m2s                          4110 13.4    0.056    0.060    4.287    4.746
 make_images                       4110 14.4    0.399    0.446    4.179    4.634
 qs_ot_p2m_diag                      48 11.0    0.029    0.044    4.276    4.296
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.848    3.849
 wfi_extrapolate                     11  7.9    0.003    0.008    3.538    3.538
 pw_transfer                       1331 11.6    0.067    0.070    3.334    3.474
 rs_pw_transfer                     902 11.9    0.012    0.014    3.260    3.468
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.223    3.360
 density_rs2pw                      110  9.6    0.004    0.004    3.204    3.357
 grid_integrate_task_list           110 12.3    3.151    3.295    3.151    3.295
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.285    3.287
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.220    3.284
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.274    3.277
 cp_fm_redistribute_end              48 14.0    2.445    3.252    2.447    3.252
 cp_fm_diag_elpa_base                48 14.0    0.770    3.124    0.802    3.214
 calculate_dm_sparse                110  9.5    0.001    0.001    2.958    2.983
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.924    2.963
 make_images_data                  4110 15.4    0.048    0.052    2.384    2.864
 hybrid_alltoall_any               4261 16.3    0.102    0.446    2.088    2.849
 fft_wrap_pw1pw2_140                451 13.1    0.202    0.219    2.684    2.820
 fft3d_ps                          1111 14.6    1.071    1.266    2.578    2.706
 cp_fm_cholesky_invert               11 10.9    2.611    2.619    2.611    2.619
 potential_pw2rs                    110 12.3    0.008    0.009    2.525    2.540
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.491    2.495
 mp_sum_l                          6514 12.8    1.839    2.472    1.839    2.472
 grid_collocate_task_list           110  9.6    2.123    2.271    2.123    2.271
 mp_alltoall_d11v                  2046 13.8    1.827    2.000    1.827    2.000
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.964    1.981
 qs_energies_init_hamiltonians       11  5.9    0.002    0.005    1.948    1.949
 jit_kernel_multiply                 10 16.4    0.988    1.930    0.988    1.930
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.792    1.809
 mp_allgather_i34                  2055 14.4    0.625    1.672    0.625    1.672
 multiply_cannon_metrocomm4       22605 15.4    0.073    0.078    0.789    1.575
 acc_transpose_blocks             24660 15.4    0.106    0.109    1.528    1.572
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.535    1.547
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.386    1.499
 mp_irecv_dv                      57340 16.2    0.666    1.457    0.666    1.457
 dbcsr_complete_redistribute        325 12.2    0.244    0.305    1.179    1.449
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=69.070000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=551.909091, yerr=5.899993
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             657.674240E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65578.
 MP_Allreduce         9838                    559.
 MP_Sync               100
 MP_Alltoall          1496                4511006.
 MP_ISendRecv        13640                  27424.
 MP_Wait             32318
 MP_comm_split          48
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.029    0.055   60.252   60.254
 qs_mol_dyn_low                       1  2.0    0.003    0.004   59.875   59.884
 qs_forces                           11  3.9    0.002    0.002   59.731   59.732
 qs_energies                         11  4.9    0.002    0.006   56.523   56.533
 scf_env_do_scf                      11  5.9    0.001    0.001   48.567   48.567
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   39.858   39.859
 velocity_verlet                     10  3.0    0.001    0.002   32.757   32.763
 dbcsr_multiply_generic            2055 12.4    0.108    0.114   28.551   28.835
 qs_scf_new_mos                      99  7.5    0.001    0.001   25.208   25.292
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   25.208   25.292
 ot_scf_mini                         99  9.5    0.002    0.003   23.980   24.086
 multiply_cannon                   2055 13.4    0.211    0.220   22.110   23.241
 multiply_cannon_loop              2055 14.4    0.618    0.634   20.884   22.167
 ot_mini                             99 10.5    0.001    0.002   13.646   13.753
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.284   12.423
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.016   12.283   12.423
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.838   10.964
 multiply_cannon_multrec          16440 15.4    3.976    5.010    9.719   10.697
 mp_waitall_1                    146766 16.3    7.205   10.208    7.205   10.208
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.156    9.259
 init_scf_loop                       11  6.9    0.000    0.000    8.670    8.671
 multiply_cannon_metrocomm3       16440 15.4    0.043    0.044    4.256    7.169
 prepare_preconditioner              11  7.9    0.000    0.000    6.894    6.909
 make_preconditioner                 11  8.9    0.000    0.001    6.894    6.909
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.253    6.597
 sum_up_and_integrate               110 10.3    0.034    0.034    6.484    6.500
 integrate_v_rspace                 110 11.3    0.002    0.003    6.450    6.466
 dbcsr_mm_accdrv_process          34862 16.1    4.835    5.391    5.598    5.776
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.598    5.608
 calculate_rho_elec                 110  8.6    0.058    0.058    5.597    5.608
 qs_ot_get_p                        110 10.4    0.001    0.001    5.482    5.600
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.999    5.463
 init_scf_run                        11  5.9    0.000    0.001    5.462    5.463
 apply_single                       110 13.6    0.000    0.000    4.999    5.463
 scf_env_initial_rho_setup           11  6.9    0.000    0.002    5.462    5.462
 make_m2s                          4110 13.4    0.049    0.051    4.274    4.566
 ot_diis_step                        99 11.5    0.011    0.013    4.456    4.456
 make_images                       4110 14.4    0.393    0.508    4.159    4.450
 multiply_cannon_sync_h2d         16440 15.4    3.716    4.273    3.716    4.273
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    3.811    3.816
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.085    3.715
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.468    3.469
 grid_integrate_task_list           110 12.3    3.190    3.395    3.190    3.395
 pw_transfer                       1331 11.6    0.066    0.073    3.146    3.153
 rs_pw_transfer                     902 11.9    0.010    0.012    2.953    3.149
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.034    3.044
 density_rs2pw                      110  9.6    0.004    0.004    2.889    3.040
 wfi_extrapolate                     11  7.9    0.001    0.001    2.969    2.969
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.923    2.925
 cp_fm_redistribute_end              48 14.0    1.815    2.890    1.817    2.891
 make_images_data                  4110 15.4    0.044    0.048    2.494    2.875
 cp_fm_diag_elpa_base                48 14.0    1.011    2.751    1.069    2.856
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.812    2.814
 hybrid_alltoall_any               4261 16.3    0.106    0.374    2.280    2.778
 cp_fm_cholesky_invert               11 10.9    2.762    2.769    2.762    2.769
 fft_wrap_pw1pw2_140                451 13.1    0.214    0.217    2.576    2.587
 calculate_dm_sparse                110  9.5    0.001    0.001    2.499    2.539
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.477    2.523
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.393    2.453
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.410    2.411
 potential_pw2rs                    110 12.3    0.011    0.011    2.351    2.363
 fft3d_ps                          1111 14.6    1.062    1.073    2.345    2.354
 grid_collocate_task_list           110  9.6    2.177    2.353    2.177    2.353
 multiply_cannon_metrocomm4       14385 15.4    0.045    0.048    0.860    2.336
 mp_irecv_dv                      48980 15.7    0.791    2.214    0.791    2.214
 mp_alltoall_d11v                  2046 13.8    1.779    2.168    1.779    2.168
 mp_sum_l                          6514 12.8    1.552    2.137    1.552    2.137
 qs_energies_init_hamiltonians       11  5.9    0.001    0.005    2.036    2.036
 dbcsr_complete_redistribute        325 12.2    0.324    0.368    1.416    1.879
 cp_fm_upper_to_full                 70 13.6    1.384    1.853    1.384    1.853
 mp_allgather_i34                  2055 14.4    0.549    1.706    0.549    1.706
 cp_fm_cholesky_decompose            22 10.9    1.658    1.679    1.658    1.679
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.646    1.660
 jit_kernel_multiply                  8 16.7    0.371    1.516    0.371    1.516
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.376    1.494
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.455    1.467
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.963    1.417
 mp_waitany                       17072 13.8    1.147    1.336    1.147    1.336
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.280    1.288
 acc_transpose_blocks             16440 15.4    0.072    0.074    1.224    1.234
 rs_gather_matrices                 110 12.3    0.137    0.150    0.857    1.214
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=60.254000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=623.181818, yerr=8.451123
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             734.380032E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65576.
 MP_Allreduce         9838                    600.
 MP_Sync               100
 MP_Alltoall          1496                5863162.
 MP_ISendRecv        10120                  43184.
 MP_Wait             25102
 MP_comm_split          48
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.042   65.013   65.015
 qs_mol_dyn_low                       1  2.0    0.003    0.003   64.730   64.739
 qs_forces                           11  3.9    0.002    0.004   64.507   64.508
 qs_energies                         11  4.9    0.003    0.018   61.050   61.053
 scf_env_do_scf                      11  5.9    0.001    0.001   52.709   52.712
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   40.812   40.813
 velocity_verlet                     10  3.0    0.002    0.002   37.151   37.160
 dbcsr_multiply_generic            2055 12.4    0.115    0.117   29.433   29.624
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.256   26.368
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.255   26.367
 ot_scf_mini                         99  9.5    0.003    0.003   24.631   24.722
 multiply_cannon                   2055 13.4    0.242    0.260   22.472   23.499
 multiply_cannon_loop              2055 14.4    0.881    0.901   20.941   21.546
 ot_mini                             99 10.5    0.001    0.002   14.058   14.170
 multiply_cannon_multrec          24660 15.4    4.206    6.930   12.624   13.676
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.051   12.148
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.017   12.051   12.147
 init_scf_loop                       11  6.9    0.000    0.000   11.856   11.856
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.659   10.743
 prepare_preconditioner              11  7.9    0.000    0.000   10.121   10.137
 make_preconditioner                 11  8.9    0.000    0.001   10.121   10.137
 qs_ot_get_derivative                99 11.5    0.001    0.002    9.896    9.989
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.344    9.804
 dbcsr_mm_accdrv_process          52304 16.0    7.000    8.478    8.270    9.219
 mp_waitall_1                    126806 16.4    4.390    6.495    4.390    6.495
 sum_up_and_integrate               110 10.3    0.039    0.041    6.399    6.414
 integrate_v_rspace                 110 11.3    0.003    0.003    6.360    6.374
 make_m2s                          4110 13.4    0.059    0.060    5.349    5.769
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.687    5.695
 calculate_rho_elec                 110  8.6    0.077    0.081    5.686    5.695
 qs_ot_get_p                        110 10.4    0.001    0.001    5.537    5.661
 make_images                       4110 14.4    0.578    0.701    5.210    5.626
 init_scf_run                        11  5.9    0.000    0.001    5.518    5.518
 scf_env_initial_rho_setup           11  6.9    0.000    0.003    5.517    5.518
 cp_fm_upper_to_full                 70 13.8    3.300    4.692    3.300    4.692
 ot_diis_step                        99 11.5    0.012    0.016    4.123    4.124
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.022    4.088
 apply_single                       110 13.6    0.000    0.000    4.022    4.087
 dbcsr_complete_redistribute        325 12.2    0.428    0.472    2.693    3.858
 qs_ot_p2m_diag                      48 11.0    0.054    0.063    3.747    3.760
 grid_integrate_task_list           110 12.3    3.280    3.425    3.280    3.425
 multiply_cannon_metrocomm3       24660 15.4    0.035    0.036    1.426    3.401
 multiply_cannon_sync_h2d         24660 15.4    3.181    3.350    3.181    3.350
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.288    3.330
 pw_transfer                       1331 11.6    0.066    0.073    3.283    3.310
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.129    3.279
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.272    3.273
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.173    3.202
 hybrid_alltoall_any               4261 16.3    0.120    0.456    2.324    3.111
 make_images_data                  4110 15.4    0.046    0.051    2.676    3.107
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.035    3.086
 density_rs2pw                      110  9.6    0.004    0.004    2.845    3.014
 wfi_extrapolate                     11  7.9    0.001    0.001    2.969    2.970
 calculate_dm_sparse                110  9.5    0.001    0.001    2.907    2.938
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.768    2.892
 mp_alltoall_i22                    605 13.7    1.653    2.859    1.653    2.859
 cp_fm_cholesky_invert               11 10.9    2.822    2.830    2.822    2.830
 rs_pw_transfer                     902 11.9    0.010    0.011    2.587    2.816
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.796    2.797
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.725    2.727
 fft_wrap_pw1pw2_140                451 13.1    0.206    0.215    2.682    2.711
 cp_fm_redistribute_end              48 14.0    1.350    2.679    1.351    2.680
 cp_fm_diag_elpa_base                48 14.0    1.248    2.547    1.326    2.657
 fft3d_ps                          1111 14.6    1.061    1.093    2.471    2.492
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.436    2.476
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.459    2.462
 grid_collocate_task_list           110  9.6    2.222    2.331    2.222    2.331
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    2.275    2.276
 potential_pw2rs                    110 12.3    0.013    0.013    2.195    2.206
 jit_kernel_multiply                 11 16.0    0.937    2.015    0.937    2.015
 mp_alltoall_d11v                  2046 13.8    1.812    1.966    1.812    1.966
 cp_fm_cholesky_decompose            22 10.9    1.729    1.778    1.729    1.778
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.717    1.751
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.625    1.724
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.637    1.648
 mp_allgather_i34                  2055 14.4    0.630    1.600    0.630    1.600
 mp_sum_l                          6514 12.8    0.930    1.572    0.930    1.572
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.523    1.538
 acc_transpose_blocks             24660 15.4    0.104    0.107    1.498    1.518
 multiply_cannon_metrocomm4       20550 15.4    0.057    0.061    0.842    1.457
 mp_irecv_dv                      62702 16.1    0.744    1.376    0.744    1.376
 mp_waitany                       13376 13.8    1.044    1.342    1.044    1.342
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.209    1.305
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=65.015000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=694.272727, yerr=10.198849
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             816.877568E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65574.
 MP_Allreduce         9838                    640.
 MP_Sync               100
 MP_Alltoall          1496                8504061.
 MP_ISendRecv         6600                  54848.
 MP_Wait             17226
 MP_comm_split          48
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.030   54.596   54.597
 qs_mol_dyn_low                       1  2.0    0.003    0.003   54.357   54.365
 qs_forces                           11  3.9    0.002    0.002   54.291   54.292
 qs_energies                         11  4.9    0.002    0.002   50.676   50.680
 scf_env_do_scf                      11  5.9    0.000    0.001   42.226   42.226
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   34.510   34.510
 velocity_verlet                     10  3.0    0.002    0.002   30.455   30.457
 dbcsr_multiply_generic            2055 12.4    0.104    0.105   22.652   22.773
 qs_scf_new_mos                      99  7.5    0.001    0.001   20.488   20.530
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   20.487   20.530
 ot_scf_mini                         99  9.5    0.002    0.002   19.254   19.268
 multiply_cannon                   2055 13.4    0.249    0.268   17.285   18.578
 multiply_cannon_loop              2055 14.4    0.322    0.336   15.952   16.272
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.406   11.424
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   11.406   11.423
 ot_mini                             99 10.5    0.001    0.001   10.380   10.390
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.152   10.168
 multiply_cannon_multrec           8220 15.4    3.225    4.495    7.435    8.403
 init_scf_loop                       11  6.9    0.000    0.000    7.667    7.669
 mp_waitall_1                    106626 16.5    6.082    7.657    6.082    7.657
 qs_ot_get_derivative                99 11.5    0.001    0.001    6.587    6.599
 sum_up_and_integrate               110 10.3    0.048    0.048    6.173    6.184
 integrate_v_rspace                 110 11.3    0.003    0.003    6.125    6.136
 prepare_preconditioner              11  7.9    0.000    0.000    6.059    6.063
 make_preconditioner                 11  8.9    0.000    0.000    6.059    6.063
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.626    5.707
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.560    5.570
 calculate_rho_elec                 110  8.6    0.115    0.116    5.559    5.569
 init_scf_run                        11  5.9    0.000    0.001    5.152    5.152
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    5.152    5.152
 qs_ot_get_p                        110 10.4    0.001    0.001    5.017    5.032
 dbcsr_mm_accdrv_process          17442 15.9    2.835    3.759    4.081    4.986
 make_m2s                          4110 13.4    0.038    0.039    4.074    4.321
 multiply_cannon_metrocomm3        8220 15.4    0.017    0.017    3.024    4.281
 make_images                       4110 14.4    0.637    0.700    3.945    4.191
 ot_diis_step                        99 11.5    0.012    0.012    3.768    3.768
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.677    3.699
 apply_single                       110 13.6    0.000    0.000    3.677    3.698
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.650    3.654
 grid_integrate_task_list           110 12.3    3.366    3.539    3.366    3.539
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.342    3.342
 pw_transfer                       1331 11.6    0.066    0.071    3.112    3.133
 multiply_cannon_sync_h2d          8220 15.4    2.912    3.028    2.912    3.028
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.001    3.027
 cp_fm_cholesky_invert               11 10.9    2.930    2.934    2.930    2.934
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.794    2.794
 qs_energies_init_hamiltonians       11  5.9    0.038    0.114    2.787    2.788
 cp_fm_redistribute_end              48 14.0    0.709    2.762    0.713    2.763
 density_rs2pw                      110  9.6    0.004    0.004    2.646    2.738
 cp_fm_diag_elpa_base                48 14.0    1.867    2.568    2.042    2.728
 make_images_data                  4110 15.4    0.038    0.043    2.279    2.674
 wfi_extrapolate                     11  7.9    0.001    0.001    2.669    2.669
 hybrid_alltoall_any               4261 16.3    0.200    0.865    2.216    2.593
 fft_wrap_pw1pw2_140                451 13.1    0.213    0.216    2.560    2.589
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.537    2.539
 calculate_dm_sparse                110  9.5    0.001    0.001    2.469    2.511
 grid_collocate_task_list           110  9.6    2.330    2.501    2.330    2.501
 rs_pw_transfer                     902 11.9    0.010    0.010    2.345    2.461
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.396    2.398
 fft3d_ps                          1111 14.6    1.113    1.147    2.258    2.285
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.146    2.153
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.789    2.001
 potential_pw2rs                    110 12.3    0.015    0.016    1.989    2.000
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    1.882    1.890
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    1.812    1.818
 mp_alltoall_d11v                  2046 13.8    1.557    1.781    1.557    1.781
 cp_fm_cholesky_decompose            22 10.9    1.683    1.705    1.683    1.705
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.499    1.616
 dbcsr_complete_redistribute        325 12.2    0.600    0.634    1.494    1.599
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.586    1.589
 mp_allgather_i34                  2055 14.4    0.518    1.497    0.518    1.497
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.437    1.449
 qs_create_task_list                 11  7.9    0.001    0.001    1.226    1.326
 generate_qs_task_list               11  8.9    0.380    0.448    1.226    1.325
 jit_kernel_multiply                  7 15.8    0.933    1.196    0.933    1.196
 mp_waitany                        9240 13.8    1.031    1.152    1.031    1.152
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.128    1.143
 multiply_cannon_metrocomm4        6165 15.4    0.017    0.019    0.482    1.126
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=54.597000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=770.545455, yerr=10.439119
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.191600E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67104.
 MP_Allreduce         9672                    819.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_ISendRecv         4620                 360267.
 MP_Wait              7524
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.028    0.046   86.094   86.094
 qs_mol_dyn_low                       1  2.0    0.003    0.004   85.733   85.741
 qs_forces                           11  3.9    0.002    0.002   85.662   85.665
 qs_energies                         11  4.9    0.001    0.002   81.601   81.604
 scf_env_do_scf                      11  5.9    0.001    0.001   71.468   71.469
 velocity_verlet                     10  3.0    0.002    0.002   54.922   54.928
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   42.869   42.870
 dbcsr_multiply_generic            2055 12.4    0.119    0.121   29.188   29.209
 init_scf_loop                       11  6.9    0.000    0.000   28.521   28.522
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.570   26.599
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.569   26.598
 prepare_preconditioner              11  7.9    0.000    0.000   26.579   26.585
 make_preconditioner                 11  8.9    0.000    0.000   26.579   26.585
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.772   26.037
 ot_scf_mini                         99  9.5    0.002    0.002   24.832   24.862
 multiply_cannon                   2055 13.4    0.345    0.386   22.035   22.863
 multiply_cannon_loop              2055 14.4    0.341    0.345   20.272   20.694
 cp_fm_upper_to_full                 70 14.2   12.698   18.212   12.698   18.212
 ot_mini                             99 10.5    0.001    0.001   13.896   13.913
 rebuild_ks_matrix                  110  8.3    0.000    0.001   13.179   13.205
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   13.178   13.205
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.980   12.004
 dbcsr_complete_redistribute        325 12.2    1.050    1.069    7.427   10.563
 multiply_cannon_multrec           8220 15.4    4.597    4.863   10.015   10.115
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.371    9.513
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.313    9.341
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.791    8.913
 mp_waitall_1                     87304 16.6    8.035    8.868    8.035    8.868
 mp_alltoall_i22                    605 13.7    5.401    8.541    5.401    8.541
 sum_up_and_integrate               110 10.3    0.090    0.091    6.560    6.576
 integrate_v_rspace                 110 11.3    0.003    0.003    6.470    6.487
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.356    6.395
 calculate_rho_elec                 110  8.6    0.227    0.227    6.355    6.394
 init_scf_run                        11  5.9    0.000    0.001    5.872    5.872
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    5.871    5.872
 make_m2s                          4110 13.4    0.042    0.043    5.375    5.862
 qs_ot_get_p                        110 10.4    0.001    0.001    5.730    5.750
 make_images                       4110 14.4    0.876    0.916    5.189    5.675
 dbcsr_mm_accdrv_process          11614 15.7    3.176    3.642    5.274    5.502
 cp_fm_cholesky_invert               11 10.9    5.480    5.485    5.480    5.485
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.018    4.838    5.375
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.684    5.122
 apply_single                       110 13.6    0.000    0.000    4.684    5.122
 ot_diis_step                        99 11.5    0.015    0.016    4.557    4.558
 qs_ot_p2m_diag                      48 11.0    0.151    0.156    4.149    4.156
 multiply_cannon_sync_h2d          8220 15.4    3.948    3.953    3.948    3.953
 pw_transfer                       1331 11.6    0.073    0.073    3.731    3.739
 grid_integrate_task_list           110 12.3    3.659    3.708    3.659    3.708
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.703    3.703
 hybrid_alltoall_any               4261 16.3    0.257    0.554    2.937    3.699
 make_images_data                  4110 15.4    0.042    0.045    3.024    3.656
 qs_energies_init_hamiltonians       11  5.9    0.002    0.003    3.649    3.651
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    3.607    3.615
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.934    3.380
 wfi_extrapolate                     11  7.9    0.001    0.001    3.244    3.245
 calculate_dm_sparse                110  9.5    0.001    0.001    3.207    3.233
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.144    3.145
 cp_fm_diag_elpa_base                48 14.0    2.595    2.796    3.142    3.143
 fft_wrap_pw1pw2_140                451 13.1    0.222    0.223    3.128    3.135
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.033    3.036
 density_rs2pw                      110  9.6    0.004    0.004    3.002    3.023
 fft3d_ps                          1111 14.6    1.259    1.273    2.810    2.819
 grid_collocate_task_list           110  9.6    2.624    2.649    2.624    2.649
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.543    2.554
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.521    2.521
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.379    2.387
 rs_pw_transfer                     902 11.9    0.011    0.011    2.197    2.237
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    2.173    2.231
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.103    2.194
 potential_pw2rs                    110 12.3    0.021    0.021    2.066    2.071
 cp_fm_cholesky_decompose            22 10.9    2.058    2.070    2.058    2.070
 jit_kernel_multiply                 10 15.5    1.894    2.014    1.894    2.014
 qs_create_task_list                 11  7.9    0.000    0.001    1.898    1.943
 generate_qs_task_list               11  8.9    0.738    0.790    1.898    1.942
 mp_alltoall_d11v                  2046 13.8    1.803    1.850    1.803    1.850
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.802    1.806
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.769    1.784
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.721    1.733
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=86.094000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1112.818182, yerr=24.154119
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             626.249728E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955383376
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4085                  56760.
 MP_Allreduce        11253                    785.
 MP_Sync               170
 MP_Alltoall          2226                2855966.
 MP_ISendRecv        48640                  18752.
 MP_Wait             66796
 MP_comm_split          83
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.033    0.119  202.209  202.210
 qs_mol_dyn_low                       1  2.0    0.003    0.004  201.662  201.674
 qs_forces                           11  3.9    0.003    0.004  201.577  201.578
 qs_energies                         11  4.9    0.001    0.003  196.039  196.055
 scf_env_do_scf                      11  5.9    0.001    0.001  179.623  179.627
 scf_env_do_scf_inner_loop          117  6.6    0.002    0.007  159.262  159.264
 dbcsr_multiply_generic            2507 12.6    0.175    0.179  123.629  124.562
 velocity_verlet                     10  3.0    0.001    0.002  121.597  121.598
 qs_scf_new_mos                     117  7.6    0.001    0.001  120.268  120.584
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  120.267  120.583
 ot_scf_mini                        117  9.6    0.003    0.004  113.695  113.978
 multiply_cannon                   2507 13.6    0.237    0.244  100.538  103.081
 multiply_cannon_loop              2507 14.6    2.105    2.168   98.341  100.365
 ot_mini                            117 10.6    0.001    0.002   65.336   65.675
 multiply_cannon_multrec          60168 15.6   33.116   35.749   41.513   43.356
 qs_ot_get_derivative               117 11.6    0.001    0.002   40.470   40.745
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.564   33.813
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.021   33.563   33.813
 mp_waitall_1                    291448 16.2   28.069   31.910   28.069   31.910
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.191   30.413
 multiply_cannon_sync_h2d         60168 15.6   27.335   30.132   27.335   30.132
 qs_ot_get_p                        128 10.4    0.001    0.001   26.593   26.928
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.337   24.992
 apply_single                       128 13.6    0.001    0.001   24.337   24.992
 ot_diis_step                       117 11.6    0.008    0.011   24.526   24.527
 init_scf_loop                       11  6.9    0.000    0.000   20.286   20.288
 qs_ot_p2m_diag                      83 11.4    0.077    0.091   19.956   20.028
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   18.893   19.102
 multiply_cannon_metrocomm3       60168 15.6    0.109    0.115   15.521   17.920
 cp_dbcsr_syevd                      83 12.4    0.004    0.005   17.384   17.385
 prepare_preconditioner              11  7.9    0.000    0.000   15.700   15.738
 make_preconditioner                 11  8.9    0.000    0.000   15.700   15.738
 make_full_inverse_cholesky          11  9.9    0.000    0.000   14.942   15.118
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   14.613   14.617
 cp_fm_redistribute_end              83 14.4   11.545   14.548   11.556   14.550
 cp_fm_diag_elpa_base                83 14.4    2.953   14.263    2.984   14.378
 sum_up_and_integrate               128 10.3    0.052    0.064   14.158   14.176
 integrate_v_rspace                 128 11.3    0.003    0.004   14.105   14.125
 make_m2s                          5014 13.6    0.102    0.109   13.680   14.013
 make_images                       5014 14.6    0.403    0.423   13.501   13.846
 qs_rho_update_rho_low              128  7.7    0.001    0.001   12.705   12.800
 calculate_rho_elec                 128  8.7    0.045    0.064   12.705   12.799
 init_scf_run                        11  5.9    0.000    0.001   12.208   12.209
 scf_env_initial_rho_setup           11  6.9    0.000    0.002   12.208   12.208
 mp_sum_l                          7870 13.0    8.177    9.319    8.177    9.319
 wfi_extrapolate                     11  7.9    0.001    0.003    8.982    8.982
 cp_fm_cholesky_invert               11 10.9    8.937    8.945    8.937    8.945
 dbcsr_mm_accdrv_process         124484 16.2    3.319    3.503    7.962    8.496
 calculate_dm_sparse                128  9.5    0.001    0.001    8.367    8.485
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    7.813    7.962
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    7.769    7.886
 multiply_cannon_metrocomm1       60168 15.6    0.087    0.091    5.880    7.839
 make_images_data                  5014 15.6    0.069    0.075    6.760    7.690
 grid_integrate_task_list           128 12.3    7.085    7.490    7.085    7.490
 hybrid_alltoall_any               5200 16.5    0.290    2.251    5.919    7.365
 density_rs2pw                      128  9.7    0.006    0.007    6.522    7.084
 pw_transfer                       1547 11.6    0.075    0.104    6.666    6.969
 rs_pw_transfer                    1046 11.9    0.017    0.019    6.225    6.870
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.013    6.452    6.735
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.659    6.668
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.826    5.934
 fft_wrap_pw1pw2_140                523 13.2    0.438    0.492    5.608    5.788
 fft3d_ps                          1291 14.7    2.094    2.601    5.252    5.495
 mp_alltoall_d11v                  2415 14.1    4.124    5.147    4.124    5.147
 grid_collocate_task_list           128  9.7    4.723    5.132    4.723    5.132
 potential_pw2rs                    128 12.3    0.009    0.011    4.729    4.749
 cp_fm_cholesky_decompose            22 10.9    4.591    4.606    4.591    4.606
 mp_sum_d                          4464 12.1    3.558    4.330    3.558    4.330
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=202.210000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=593.363636, yerr=6.984043
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.183246E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               5975232       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.7
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             825.200640E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2406720
 MPI messages size (bytes):
  total size                         4.100942E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.703955E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               70860               2317615104
     32768 < size <=   131072              722992              55511613440
    131072 < size <=  4194304             1375664            1398181724160
   4194304 < size <= 16777216              154704            1463834332048
  16777216 < size                           67584            1181116006400
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4075                  57341.
 MP_Allreduce        11227                    947.
 MP_Sync               170
 MP_Alltoall          1969                6320497.
 MP_ISendRecv        24064                  47072.
 MP_Wait             37948
 MP_comm_split          83
 MP_ISend            11748                 212467.
 MP_IRecv            11748                 212467.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.114    0.255  189.238  189.239
 qs_mol_dyn_low                       1  2.0    0.003    0.005  188.601  188.634
 qs_forces                           11  3.9    0.003    0.006  188.514  188.515
 qs_energies                         11  4.9    0.002    0.005  181.745  181.755
 scf_env_do_scf                      11  5.9    0.001    0.001  165.446  165.455
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  132.744  132.746
 velocity_verlet                     10  3.0    0.002    0.002  118.970  118.971
 dbcsr_multiply_generic            2507 12.6    0.184    0.188   97.220   98.302
 qs_scf_new_mos                     117  7.6    0.001    0.001   93.946   94.365
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   93.945   94.365
 ot_scf_mini                        117  9.6    0.004    0.005   89.163   89.647
 multiply_cannon                   2507 13.6    0.475    0.532   77.195   81.862
 multiply_cannon_loop              2507 14.6    1.256    1.292   74.012   76.401
 ot_mini                            117 10.6    0.001    0.001   49.370   49.838
 mp_waitall_1                    226760 16.4   24.553   37.977   24.553   37.977
 multiply_cannon_multrec          30084 15.6   22.206   26.882   31.820   36.700
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.012   33.550
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.022   33.012   33.550
 init_scf_loop                       11  6.9    0.000    0.000   32.613   32.614
 qs_ks_update_qs_env                128  7.6    0.001    0.001   29.747   30.241
 multiply_cannon_metrocomm3       30084 15.6    0.096    0.100   15.365   28.406
 prepare_preconditioner              11  7.9    0.000    0.000   28.207   28.254
 make_preconditioner                 11  8.9    0.000    0.001   28.207   28.254
 qs_ot_get_derivative               117 11.6    0.001    0.002   27.535   28.011
 make_full_inverse_cholesky          11  9.9    0.000    0.000   26.899   27.435
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   21.966   23.062
 apply_single                       128 13.6    0.001    0.001   21.966   23.061
 qs_ot_get_p                        128 10.4    0.001    0.001   21.520   22.126
 ot_diis_step                       117 11.6    0.014    0.018   21.666   21.668
 multiply_cannon_sync_h2d         30084 15.6   19.417   21.644   19.417   21.644
 qs_ot_p2m_diag                      83 11.4    0.188    0.226   16.782   16.825
 cp_fm_cholesky_invert               11 10.9   16.458   16.470   16.458   16.470
 make_m2s                          5014 13.6    0.088    0.095   14.402   15.955
 make_images                       5014 14.6    1.159    1.342   14.196   15.751
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   15.648   15.649
 sum_up_and_integrate               128 10.3    0.062    0.072   14.322   14.357
 integrate_v_rspace                 128 11.3    0.003    0.004   14.260   14.298
 qs_rho_update_rho_low              128  7.7    0.001    0.001   12.936   12.970
 calculate_rho_elec                 128  8.7    0.088    0.105   12.935   12.969
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   12.614   12.621
 cp_fm_redistribute_end              83 14.4    7.385   12.556    7.397   12.557
 cp_fm_diag_elpa_base                83 14.4    4.925   12.000    5.144   12.464
 init_scf_run                        11  5.9    0.000    0.001   11.421   11.422
 scf_env_initial_rho_setup           11  6.9    0.000    0.002   11.421   11.422
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   10.805   11.159
 make_images_data                  5014 15.6    0.067    0.074    8.853   10.760
 multiply_cannon_metrocomm4       27577 15.6    0.096    0.111    3.701   10.369
 hybrid_alltoall_any               5200 16.5    0.342    1.499    7.482   10.105
 mp_irecv_dv                      69486 16.3    3.509    9.996    3.509    9.996
 dbcsr_mm_accdrv_process          62242 16.2    4.421    5.092    9.074    9.681
 wfi_extrapolate                     11  7.9    0.001    0.003    8.325    8.325
 pw_transfer                       1547 11.6    0.083    0.101    7.658    7.716
 grid_integrate_task_list           128 12.3    7.152    7.525    7.152    7.525
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    7.429    7.486
 density_rs2pw                      128  9.7    0.006    0.006    6.736    7.303
 cp_fm_cholesky_decompose            22 10.9    6.906    6.985    6.906    6.985
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.136    6.874
 fft_wrap_pw1pw2_140                523 13.2    0.461    0.499    6.531    6.584
 calculate_dm_sparse                128  9.5    0.001    0.001    6.435    6.561
 rs_pw_transfer                    1046 11.9    0.014    0.017    5.794    6.350
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.141    6.152
 fft3d_ps                          1291 14.7    2.779    2.931    5.801    5.842
 mp_sum_l                          7870 13.0    3.973    5.766    3.973    5.766
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.314    5.434
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    5.296    5.363
 grid_collocate_task_list           128  9.7    4.910    5.275    4.910    5.275
 mp_allgather_i34                  2507 14.6    1.756    5.188    1.756    5.188
 potential_pw2rs                    128 12.3    0.016    0.018    4.839    4.858
 mp_alltoall_d11v                  2415 14.1    4.081    4.613    4.081    4.613
 mp_sum_d                          4459 12.1    2.595    3.901    2.595    3.901
 dbcsr_complete_redistribute        395 12.7    0.781    0.862    3.070    3.892
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=189.239000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=786.090909, yerr=2.874798
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.928533E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks               3984192       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1695.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             929.529856E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1042912
 MPI messages size (bytes):
  total size                         2.716210E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.604448E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              281856              36943429632
    131072 < size <=  4194304              660064             996105256960
   4194304 < size <= 16777216               65632             931531265168
  16777216 < size                           28672             751619276800
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4103                  56951.
 MP_Allreduce        11296                    984.
 MP_Sync               170
 MP_Alltoall          1712                9388896.
 MP_ISendRecv        15872                  75008.
 MP_Wait             29756
 MP_comm_split          83
 MP_ISend            11748                 275205.
 MP_IRecv            11748                 275205.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.049    0.109  177.617  177.618
 qs_mol_dyn_low                       1  2.0    0.003    0.004  177.145  177.158
 qs_forces                           11  3.9    0.003    0.003  177.048  177.052
 qs_energies                         11  4.9    0.003    0.008  170.487  170.496
 scf_env_do_scf                      11  5.9    0.001    0.002  154.801  154.804
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.009  119.584  119.585
 velocity_verlet                     10  3.0    0.002    0.002  113.642  113.644
 dbcsr_multiply_generic            2507 12.6    0.182    0.187   82.284   83.466
 qs_scf_new_mos                     117  7.6    0.001    0.001   82.542   82.881
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   82.542   82.880
 ot_scf_mini                        117  9.6    0.004    0.004   78.387   78.748
 multiply_cannon                   2507 13.6    0.496    0.517   61.960   66.349
 multiply_cannon_loop              2507 14.6    0.862    0.891   58.852   61.565
 ot_mini                            117 10.6    0.001    0.002   42.949   43.330
 mp_waitall_1                    178456 16.5   26.192   35.273   26.192   35.273
 init_scf_loop                       11  6.9    0.000    0.000   35.113   35.114
 prepare_preconditioner              11  7.9    0.000    0.000   31.098   31.152
 make_preconditioner                 11  8.9    0.000    0.001   31.098   31.152
 rebuild_ks_matrix                  128  8.3    0.001    0.001   30.465   30.913
 qs_ks_build_kohn_sham_matrix       128  9.3    0.016    0.018   30.465   30.913
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.725   30.098
 qs_ks_update_qs_env                128  7.6    0.001    0.001   27.505   27.920
 multiply_cannon_metrocomm3       20056 15.6    0.058    0.062   15.724   24.799
 multiply_cannon_multrec          20056 15.6   13.341   16.056   22.015   24.788
 qs_ot_get_derivative               117 11.6    0.001    0.002   22.933   23.300
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   20.125   21.153
 apply_single                       128 13.6    0.001    0.001   20.125   21.152
 qs_ot_get_p                        128 10.4    0.001    0.002   20.648   21.084
 ot_diis_step                       117 11.6    0.018    0.021   19.903   19.904
 qs_ot_p2m_diag                      83 11.4    0.265    0.272   16.374   16.391
 make_m2s                          5014 13.6    0.080    0.085   15.209   16.130
 make_images                       5014 14.6    1.186    1.291   14.978   15.897
 multiply_cannon_sync_h2d         20056 15.6   14.110   15.744   14.110   15.744
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   15.339   15.341
 cp_fm_cholesky_invert               11 10.9   14.605   14.613   14.605   14.613
 sum_up_and_integrate               128 10.3    0.072    0.076   14.257   14.282
 integrate_v_rspace                 128 11.3    0.003    0.003   14.185   14.210
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.190   13.223
 calculate_rho_elec                 128  8.7    0.132    0.146   13.189   13.223
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   12.147   12.148
 cp_fm_redistribute_end              83 14.4    4.591   12.068    4.606   12.070
 cp_fm_diag_elpa_base                83 14.4    7.032   11.447    7.444   11.960
 make_images_data                  5014 15.6    0.061    0.069    9.488   10.905
 init_scf_run                        11  5.9    0.000    0.001   10.561   10.561
 scf_env_initial_rho_setup           11  6.9    0.001    0.006   10.561   10.561
 hybrid_alltoall_any               5200 16.5    0.431    1.967    8.153    9.640
 multiply_cannon_metrocomm4       17549 15.6    0.063    0.072    3.526    9.521
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.012    9.275
 mp_irecv_dv                      50230 16.2    3.402    9.271    3.402    9.271
 dbcsr_mm_accdrv_process          41502 16.2    4.303    4.917    8.141    8.255
 pw_transfer                       1547 11.6    0.083    0.102    7.794    7.907
 grid_integrate_task_list           128 12.3    7.285    7.758    7.285    7.758
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    7.569    7.688
 cp_fm_upper_to_full                105 14.5    5.759    7.463    5.759    7.463
 wfi_extrapolate                     11  7.9    0.001    0.001    7.415    7.415
 cp_fm_cholesky_decompose            22 10.9    7.277    7.314    7.277    7.314
 density_rs2pw                      128  9.7    0.005    0.006    6.644    7.060
 fft_wrap_pw1pw2_140                523 13.2    0.470    0.510    6.630    6.752
 dbcsr_complete_redistribute        395 12.7    1.196    1.234    4.690    6.463
 fft3d_ps                          1291 14.7    2.698    2.911    5.844    5.928
 rs_pw_transfer                    1046 11.9    0.013    0.014    5.417    5.863
 calculate_dm_sparse                128  9.5    0.001    0.001    5.763    5.859
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.530    5.534
 grid_collocate_task_list           128  9.7    5.072    5.482    5.072    5.482
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.547    5.248
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.465    5.236
 mp_alltoall_d11v                  2415 14.1    4.382    4.976    4.382    4.976
 mp_allgather_i34                  2507 14.6    1.632    4.874    1.632    4.874
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.593    4.748
 potential_pw2rs                    128 12.3    0.021    0.023    4.676    4.687
 mp_sum_l                          7870 13.0    3.320    4.682    3.320    4.682
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    2.354    4.101
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    4.067    4.098
 mp_alltoall_i22                    716 14.1    1.959    3.911    1.959    3.911
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    3.829    3.830
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=177.618000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=882.636364, yerr=8.657868
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.353791E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks               5977344       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.148670E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1143192
 MPI messages size (bytes):
  total size                         2.023815E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.770320E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              319024              36042702848
    131072 < size <=  4194304              715736             785529176064
   4194304 < size <= 16777216               70320             665379475120
  16777216 < size                           30720             536870912000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4085                  57194.
 MP_Allreduce        11251                   1067.
 MP_Sync               170
 MP_Alltoall          1712               12503107.
 MP_ISendRecv        11776                  75008.
 MP_Wait             28330
 MP_comm_split          83
 MP_ISend            14952                 244818.
 MP_IRecv            14952                 244818.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.030    0.076  188.888  188.889
 qs_mol_dyn_low                       1  2.0    0.003    0.004  188.490  188.502
 qs_forces                           11  3.9    0.003    0.003  188.379  188.392
 qs_energies                         11  4.9    0.002    0.003  181.283  181.296
 scf_env_do_scf                      11  5.9    0.001    0.001  164.273  164.286
 velocity_verlet                     10  3.0    0.002    0.002  125.408  125.411
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  117.114  117.117
 qs_scf_new_mos                     117  7.6    0.001    0.001   81.041   81.352
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   81.040   81.352
 dbcsr_multiply_generic            2507 12.6    0.190    0.194   79.954   80.651
 ot_scf_mini                        117  9.6    0.003    0.004   76.508   76.815
 multiply_cannon                   2507 13.6    0.559    0.586   54.836   58.388
 multiply_cannon_loop              2507 14.6    1.180    1.210   51.094   52.872
 init_scf_loop                       11  6.9    0.000    0.000   47.023   47.027
 prepare_preconditioner              11  7.9    0.000    0.000   42.912   42.942
 make_preconditioner                 11  8.9    0.000    0.001   42.912   42.942
 ot_mini                            117 10.6    0.001    0.001   42.570   42.889
 make_full_inverse_cholesky          11  9.9    0.000    0.000   36.296   41.340
 multiply_cannon_multrec          30084 15.6   14.044   19.697   25.943   30.985
 rebuild_ks_matrix                  128  8.3    0.001    0.001   29.423   29.746
 qs_ks_build_kohn_sham_matrix       128  9.3    0.016    0.019   29.423   29.746
 mp_waitall_1                    153770 16.5   18.193   27.586   18.193   27.586
 qs_ks_update_qs_env                128  7.6    0.001    0.001   26.605   26.900
 qs_ot_get_derivative               117 11.6    0.001    0.002   22.851   23.154
 make_m2s                          5014 13.6    0.094    0.099   20.691   21.834
 make_images                       5014 14.6    1.960    2.296   20.389   21.533
 qs_ot_get_p                        128 10.4    0.001    0.002   19.994   20.322
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   19.044   19.607
 apply_single                       128 13.6    0.001    0.001   19.044   19.607
 ot_diis_step                       117 11.6    0.018    0.019   19.590   19.591
 cp_fm_upper_to_full                105 14.7   11.232   16.612   11.232   16.612
 cp_fm_cholesky_invert               11 10.9   16.322   16.332   16.322   16.332
 qs_ot_p2m_diag                      83 11.4    0.343    0.390   15.849   15.901
 multiply_cannon_metrocomm3       30084 15.6    0.047    0.049    6.649   15.024
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   14.557   14.558
 sum_up_and_integrate               128 10.3    0.079    0.087   14.165   14.194
 integrate_v_rspace                 128 11.3    0.003    0.003   14.086   14.116
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.348   13.385
 calculate_rho_elec                 128  8.7    0.174    0.190   13.347   13.384
 dbcsr_complete_redistribute        395 12.7    1.541    1.657    9.426   13.236
 make_images_data                  5014 15.6    0.064    0.069   11.279   13.218
 multiply_cannon_sync_h2d         30084 15.6   11.678   12.694   11.678   12.694
 hybrid_alltoall_any               5200 16.5    0.526    2.205    9.975   12.346
 dbcsr_mm_accdrv_process          62264 16.2    7.392    8.233   11.470   11.991
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    7.985   11.794
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   11.319   11.321
 cp_fm_redistribute_end              83 14.4    1.969   11.250    1.984   11.255
 cp_fm_diag_elpa_base                83 14.4    8.648   10.618    9.240   11.135
 init_scf_run                        11  5.9    0.000    0.001   10.969   10.971
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   10.969   10.970
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.593   10.292
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.498    9.719
 mp_alltoall_i22                    716 14.1    5.649    9.430    5.649    9.430
 pw_transfer                       1547 11.6    0.083    0.097    7.926    8.007
 grid_integrate_task_list           128 12.3    7.524    7.844    7.524    7.844
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    7.698    7.783
 cp_fm_cholesky_decompose            22 10.9    7.637    7.735    7.637    7.735
 wfi_extrapolate                     11  7.9    0.001    0.001    7.726    7.726
 multiply_cannon_metrocomm4       25070 15.6    0.075    0.085    2.823    7.410
 mp_irecv_dv                      76098 16.2    2.679    7.143    2.679    7.143
 fft_wrap_pw1pw2_140                523 13.2    0.472    0.483    6.823    6.931
 density_rs2pw                      128  9.7    0.005    0.006    6.414    6.788
 calculate_dm_sparse                128  9.5    0.001    0.001    6.201    6.281
 fft3d_ps                          1291 14.7    2.798    2.887    5.939    6.011
 mp_alltoall_d11v                  2415 14.1    5.287    5.955    5.287    5.955
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.537    5.582
 grid_collocate_task_list           128  9.7    5.209    5.554    5.209    5.554
 rs_pw_transfer                    1046 11.9    0.013    0.014    4.903    5.277
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.426    4.558
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.422    4.515
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    4.505    4.505
 potential_pw2rs                    128 12.3    0.024    0.024    4.377    4.388
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    4.272    4.325
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=188.889000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1082.272727, yerr=21.011213
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.865088E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               1960712       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3445.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.521656E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  240672
 MPI messages size (bytes):
  total size                         1.331455E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.532237E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              113904              59718500352
   4194304 < size <= 16777216              104976             550376570880
  16777216 < size                           20208             721350092304
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8931                     51.
 MP_Alltoall          9654                 799394.
 MP_ISend            40068                2102572.
 MP_IRecv            40068                2101675.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58352.
 MP_Allreduce        10977                   1175.
 MP_Sync                87
 MP_Alltoall          1712               18838210.
 MP_ISendRecv         7680                 122880.
 MP_Wait             19962
 MP_ISend            10680                 423556.
 MP_IRecv            10680                 423556.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.033  171.052  171.052
 qs_mol_dyn_low                       1  2.0    0.003    0.003  170.668  170.680
 qs_forces                           11  3.9    0.003    0.003  170.567  170.571
 qs_energies                         11  4.9    0.001    0.002  163.255  163.263
 scf_env_do_scf                      11  5.9    0.001    0.001  145.824  145.839
 velocity_verlet                     10  3.0    0.002    0.002  112.180  112.184
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  109.831  109.832
 qs_scf_new_mos                     117  7.6    0.001    0.001   74.554   74.645
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   74.553   74.644
 dbcsr_multiply_generic            2507 12.6    0.180    0.186   73.751   74.037
 ot_scf_mini                        117  9.6    0.004    0.004   70.150   70.189
 multiply_cannon                   2507 13.6    0.584    0.616   54.507   58.680
 multiply_cannon_loop              2507 14.6    0.447    0.458   49.755   50.239
 ot_mini                            117 10.6    0.001    0.001   39.407   39.447
 init_scf_loop                       11  6.9    0.000    0.000   35.837   35.840
 mp_waitall_1                    129618 16.6   25.297   32.914   25.297   32.914
 prepare_preconditioner              11  7.9    0.000    0.000   31.992   32.020
 make_preconditioner                 11  8.9    0.000    0.000   31.992   32.020
 make_full_inverse_cholesky          11  9.9    0.000    0.000   29.813   30.088
 rebuild_ks_matrix                  128  8.3    0.001    0.001   28.347   28.439
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.017   28.346   28.438
 qs_ks_update_qs_env                128  7.6    0.001    0.001   25.857   25.937
 multiply_cannon_multrec          10028 15.6   10.477   14.421   17.997   20.677
 ot_diis_step                       117 11.6    0.019    0.020   19.808   19.809
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.416   19.663
 apply_single                       128 13.6    0.001    0.001   19.416   19.663
 qs_ot_get_derivative               117 11.6    0.002    0.002   19.518   19.561
 multiply_cannon_metrocomm3       10028 15.6    0.022    0.023   12.148   18.552
 cp_fm_cholesky_invert               11 10.9   18.250   18.256   18.250   18.256
 make_m2s                          5014 13.6    0.066    0.070   15.639   18.164
 qs_ot_get_p                        128 10.4    0.001    0.001   17.952   18.014
 make_images                       5014 14.6    2.317    2.860   15.335   17.861
 qs_ot_p2m_diag                      83 11.4    0.495    0.501   14.282   14.299
 sum_up_and_integrate               128 10.3    0.103    0.109   13.870   13.920
 integrate_v_rspace                 128 11.3    0.003    0.004   13.766   13.821
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   13.128   13.129
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.022   13.054
 calculate_rho_elec                 128  8.7    0.257    0.268   13.021   13.053
 multiply_cannon_sync_h2d         10028 15.6   11.660   12.118   11.660   12.118
 make_images_data                  5014 15.6    0.053    0.060    9.412   11.976
 hybrid_alltoall_any               5200 16.5    0.812    3.550    9.203   11.841
 init_scf_run                        11  5.9    0.000    0.001   10.562   10.562
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   10.562   10.562
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   10.097   10.103
 cp_fm_diag_elpa_base                83 14.4    9.845    9.932   10.089   10.096
 cp_fm_cholesky_decompose            22 10.9    7.997    8.107    7.997    8.107
 grid_integrate_task_list           128 12.3    7.796    8.031    7.796    8.031
 qs_ot_get_derivative_diag           77 12.4    0.002    0.003    7.797    7.827
 dbcsr_mm_accdrv_process          20762 16.1    2.636    3.454    7.146    7.776
 pw_transfer                       1547 11.6    0.082    0.091    7.453    7.476
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.010    7.227    7.255
 wfi_extrapolate                     11  7.9    0.001    0.001    7.227    7.227
 multiply_cannon_metrocomm1       10028 15.6    0.029    0.030    4.245    6.976
 mp_allgather_i34                  2507 14.6    2.953    6.911    2.953    6.911
 density_rs2pw                      128  9.7    0.005    0.006    5.896    6.386
 fft_wrap_pw1pw2_140                523 13.2    0.493    0.508    6.342    6.379
 calculate_dm_sparse                128  9.5    0.001    0.001    6.145    6.203
 grid_collocate_task_list           128  9.7    5.474    5.864    5.474    5.864
 dbcsr_complete_redistribute        395 12.7    2.171    2.272    5.259    5.643
 fft3d_ps                          1291 14.7    2.721    2.793    5.393    5.417
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    5.277    5.277
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.202    5.209
 mp_alltoall_d11v                  2415 14.1    4.537    4.957    4.537    4.957
 rs_pw_transfer                    1046 11.9    0.013    0.013    4.285    4.791
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.124    4.156
 potential_pw2rs                    128 12.3    0.027    0.028    4.072    4.086
 multiply_cannon_metrocomm4        7521 15.6    0.024    0.026    1.852    4.038
 mp_irecv_dv                      28860 15.9    1.816    3.967    1.816    3.967
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.596    3.887
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.462    3.796
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.682    3.708
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    3.544    3.612
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.469    3.483
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=171.052000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1428.545455, yerr=51.706099
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430456039424       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992003932160       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613072052224       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239176077312       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239176077312       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.233020E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.786061E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806383904       0.0%      0.0%    100.0%
 number of processed stacks               1980288       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3437.1
 marketing flops                   145.650931E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank               2.292199E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  101160
 MPI messages size (bytes):
  total size                         1.144970E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.318403E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               45648              35433480192
   4194304 < size <= 16777216               44720             382939955200
  16777216 < size                           10176             726592466352
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4045                  58536.
 MP_Allreduce        11109                   1511.
 MP_Sync                88
 MP_Alltoall          1724               36993632.
 MP_ISendRecv         3612                 218624.
 MP_Wait             11682
 MP_ISend             6456                1080169.
 MP_IRecv             6456                1080169.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.042  292.003  292.003
 qs_mol_dyn_low                       1  2.0    0.003    0.004  291.304  291.316
 qs_forces                           11  3.9    0.003    0.003  291.218  291.220
 qs_energies                         11  4.9    0.002    0.002  282.557  282.571
 scf_env_do_scf                      11  5.9    0.001    0.001  260.988  261.006
 velocity_verlet                     10  3.0    0.002    0.002  209.802  209.809
 scf_env_do_scf_inner_loop          118  6.6    0.003    0.008  136.834  136.836
 init_scf_loop                       11  6.9    0.000    0.000  123.897  123.898
 prepare_preconditioner              11  7.9    0.000    0.000  119.169  119.204
 make_preconditioner                 11  8.9    0.000    0.000  119.169  119.204
 make_full_inverse_cholesky          11  9.9    0.000    0.000   95.347  116.275
 qs_scf_new_mos                     118  7.6    0.001    0.001   93.971   94.072
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   93.970   94.071
 ot_scf_mini                        118  9.6    0.004    0.004   89.117   89.185
 dbcsr_multiply_generic            2529 12.6    0.218    0.224   84.675   85.149
 cp_fm_upper_to_full                106 14.8   52.424   75.219   52.424   75.219
 multiply_cannon                   2529 13.6    0.699    0.756   59.966   60.395
 multiply_cannon_loop              2529 14.6    0.475    0.483   56.170   57.399
 ot_mini                            118 10.6    0.001    0.001   45.333   45.396
 dbcsr_complete_redistribute        397 12.7    4.084    4.125   29.485   42.216
 copy_fm_to_dbcsr                   210 11.7    0.001    0.002   26.073   38.812
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000   23.776   36.427
 rebuild_ks_matrix                  129  8.3    0.001    0.001   34.212   34.255
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.018   34.212   34.254
 mp_alltoall_i22                    720 14.1   21.593   34.165   21.593   34.165
 cp_fm_cholesky_invert               11 10.9   33.708   33.714   33.708   33.714
 mp_waitall_1                    106386 16.7   29.283   33.051   29.283   33.051
 qs_ks_update_qs_env                129  7.6    0.001    0.001   31.962   31.998
 qs_ot_get_p                        129 10.4    0.001    0.001   27.958   28.014
 qs_ot_get_derivative               118 11.6    0.002    0.002   24.617   24.679
 qs_ot_p2m_diag                      84 11.4    0.890    0.895   23.732   23.761
 cp_dbcsr_syevd                      84 12.4    0.006    0.006   21.957   21.960
 make_m2s                          5058 13.6    0.077    0.078   19.853   20.893
 multiply_cannon_metrocomm3       10116 15.6    0.023    0.023   19.758   20.882
 ot_diis_step                       118 11.6    0.022    0.023   20.667   20.667
 make_images                       5058 14.6    3.783    3.888   19.376   20.418
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   19.868   20.005
 apply_single                       129 13.6    0.001    0.001   19.867   20.005
 cp_fm_diag_elpa                     84 13.4    0.000    0.000   18.681   18.683
 cp_fm_diag_elpa_base                84 14.4   14.199   15.860   18.677   18.679
 multiply_cannon_multrec          10116 15.6   10.614   12.400   18.102   18.204
 sum_up_and_integrate               129 10.3    0.195    0.197   15.796   15.902
 multiply_cannon_sync_h2d         10116 15.6   15.777   15.782   15.777   15.782
 integrate_v_rspace                 129 11.3    0.004    0.004   15.600   15.707
 qs_rho_update_rho_low              129  7.7    0.001    0.001   15.196   15.210
 calculate_rho_elec                 129  8.7    0.486    0.487   15.196   15.210
 make_images_data                  5058 15.6    0.061    0.065   10.592   12.401
 hybrid_alltoall_any               5245 16.5    1.311    3.057   10.630   12.358
 init_scf_run                        11  5.9    0.000    0.001   11.700   11.701
 scf_env_initial_rho_setup           11  6.9    0.000    0.000   11.700   11.701
 qs_ot_get_derivative_diag           78 12.4    0.002    0.003    9.809    9.854
 dbcsr_mm_accdrv_process          20934 16.1    3.891    5.600    7.250    9.089
 cp_fm_cholesky_decompose            22 10.9    8.982    9.026    8.982    9.026
 grid_integrate_task_list           129 12.3    8.614    8.781    8.614    8.781
 wfi_extrapolate                     11  7.9    0.001    0.001    8.604    8.605
 pw_transfer                       1559 11.6    0.094    0.095    8.509    8.513
 fft_wrap_pw1pw2                   1301 12.7    0.011    0.011    8.268    8.273
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    7.909    7.909
 fft_wrap_pw1pw2_140                527 13.2    0.530    0.534    7.302    7.314
 mp_alltoall_d11v                  2429 14.1    6.920    7.058    6.920    7.058
 calculate_dm_sparse                129  9.5    0.001    0.001    6.717    6.792
 grid_collocate_task_list           129  9.7    6.386    6.422    6.386    6.422
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.282    6.366
 fft3d_ps                          1301 14.7    2.783    2.813    6.307    6.321
 density_rs2pw                      129  9.7    0.005    0.005    6.200    6.229
 copy_dbcsr_to_fm                   187 11.8    0.004    0.004    6.052    6.142
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=292.003000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2154.545455, yerr=54.880637
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.260167E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255669.
 MP_Allreduce         3059                   6274.
 MP_Sync                 4
 MP_Alltoall            54
 MP_ISendRecv          570                  19200.
 MP_Wait              1302
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.047    0.064   85.162   85.163
 qs_energies                          1  2.0    0.000    0.000   84.639   84.653
 ls_scf                               1  3.0    0.000    0.003   83.688   83.701
 dbcsr_multiply_generic             111  6.7    0.014    0.015   72.587   72.752
 multiply_cannon                    111  7.7    0.018    0.021   55.883   57.305
 multiply_cannon_loop               111  8.7    0.209    0.222   52.444   54.099
 ls_scf_main                          1  4.0    0.000    0.002   52.320   52.321
 density_matrix_trs4                  2  5.0    0.002    0.003   46.818   46.877
 ls_scf_init_scf                      1  4.0    0.000    0.001   28.307   28.309
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   27.219   27.276
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.002   25.065   25.094
 mp_waitall_1                     11316 10.9   22.242   24.632   22.242   24.632
 multiply_cannon_multrec           2664  9.7    8.158    8.960   15.558   17.369
 multiply_cannon_sync_h2d          2664  9.7   13.674   15.402   13.674   15.402
 make_m2s                           222  7.7    0.008    0.011   13.004   13.521
 make_images                        222  8.7    0.099    0.108   12.982   13.502
 multiply_cannon_metrocomm1        2664  9.7    0.009    0.011    9.583   11.907
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.413    8.324
 hybrid_alltoall_any                227 10.6    0.216    1.840    6.474    8.156
 make_images_data                   222  9.7    0.004    0.005    7.557    8.154
 dbcsr_mm_accdrv_process           4760 10.4    0.510    0.610    7.021    7.987
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.312    7.229    6.312    7.229
 calculate_norms                   4752  9.8    5.534    6.109    5.534    6.109
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.062    5.195
 mp_sum_l                           807  5.4    3.171    4.657    3.171    4.657
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.366    3.468
 make_images_sizes                  222  9.7    0.000    0.000    0.832    3.467
 mp_alltoall_i44                    222 10.7    0.832    3.467    0.832    3.467
 multiply_cannon_metrocomm4        2442  9.7    0.011    0.014    2.067    3.367
 mp_irecv_dv                       6231 10.9    2.049    3.338    2.049    3.338
 arnoldi_extremal                     4  6.8    0.000    0.003    3.269    3.298
 arnoldi_normal_ev                    4  7.8    0.001    0.006    3.269    3.298
 build_subspace                      16  8.4    0.009    0.013    3.168    3.171
 ls_scf_post                          1  4.0    0.000    0.003    3.060    3.073
 ls_scf_store_result                  1  5.0    0.000    0.000    2.872    2.911
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.262    2.744
 dbcsr_merge_single_wm              555 10.7    0.456    0.568    2.254    2.736
 make_images_pack                   222  9.7    2.207    2.629    2.209    2.630
 dbcsr_matrix_vector_mult           304  9.0    0.003    0.010    2.334    2.568
 dbcsr_sort_data                    658 11.4    2.055    2.492    2.055    2.492
 dbcsr_matrix_vector_mult_local     304 10.0    2.067    2.456    2.069    2.458
 ls_scf_dm_to_ks                      2  5.0    0.000    0.001    2.242    2.330
 buffer_matrices_ensure_size        222  8.7    1.752    2.104    1.752    2.104
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.780    1.784
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.770    1.774
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.007    1.770    1.774
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=85.163000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1131.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.108445E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  10339.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_ISendRecv          282                  57600.
 MP_Wait               828
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.102    0.125   91.890   91.890
 qs_energies                          1  2.0    0.000    0.000   91.166   91.172
 ls_scf                               1  3.0    0.000    0.000   89.834   89.841
 dbcsr_multiply_generic             111  6.7    0.015    0.016   75.868   76.197
 multiply_cannon                    111  7.7    0.028    0.039   53.220   57.571
 ls_scf_main                          1  4.0    0.000    0.000   55.488   55.493
 multiply_cannon_loop               111  8.7    0.116    0.123   49.991   53.466
 density_matrix_trs4                  2  5.0    0.002    0.003   49.776   50.000
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.804   30.805
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.646   29.759
 mp_waitall_1                      9246 10.9   21.345   29.285   21.345   29.285
 multiply_cannon_multrec           1332  9.7   13.099   17.075   22.409   27.510
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.235   27.246
 multiply_cannon_metrocomm3        1332  9.7    0.006    0.008   11.763   20.577
 make_m2s                           222  7.7    0.006    0.007   15.543   16.219
 make_images                        222  8.7    1.583    1.924   15.513   16.189
 dbcsr_mm_accdrv_process           4041 10.4    0.275    0.451    8.912   10.518
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.506   10.067    8.506   10.067
 make_images_data                   222  9.7    0.004    0.004    8.977    9.851
 hybrid_alltoall_any                227 10.6    0.519    2.447    8.373    9.380
 mp_sum_l                           807  5.4    5.801    8.883    5.801    8.883
 multiply_cannon_metrocomm4        1221  9.7    0.006    0.008    3.219    7.813
 mp_irecv_dv                       3311 11.0    3.199    7.760    3.199    7.760
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.508    7.142
 calculate_norms                   2376  9.8    6.001    6.655    6.001    6.655
 multiply_cannon_sync_h2d          1332  9.7    4.788    5.862    4.788    5.862
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.059    5.278
 arnoldi_extremal                     4  6.8    0.000    0.000    4.659    4.678
 arnoldi_normal_ev                    4  7.8    0.001    0.005    4.659    4.678
 build_subspace                      16  8.4    0.014    0.021    4.401    4.405
 ls_scf_post                          1  4.0    0.000    0.000    3.542    3.548
 dbcsr_matrix_vector_mult           304  9.0    0.005    0.017    3.154    3.369
 ls_scf_store_result                  1  5.0    0.000    0.000    3.246    3.360
 dbcsr_matrix_vector_mult_local     304 10.0    2.742    3.226    2.744    3.228
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.554    2.635
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.228    2.452
 mp_allgather_i34                   111  8.7    0.926    2.433    0.926    2.433
 make_images_pack                   222  9.7    2.026    2.414    2.028    2.416
 dbcsr_sort_data                    436 11.2    1.836    2.082    1.836    2.082
 dbcsr_data_new                    4174 10.1    1.612    1.844    1.612    1.844
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=91.890000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1718.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.699579E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265470.
 MP_Allreduce         3058                  11181.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_ISendRecv          186                  57600.
 MP_Wait               732
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.048    0.103   93.622   93.624
 qs_energies                          1  2.0    0.000    0.001   92.884   92.891
 ls_scf                               1  3.0    0.004    0.016   91.466   91.471
 dbcsr_multiply_generic             111  6.7    0.015    0.016   76.246   76.590
 ls_scf_main                          1  4.0    0.000    0.001   57.069   57.073
 multiply_cannon                    111  7.7    0.048    0.172   52.795   56.178
 multiply_cannon_loop               111  8.7    0.100    0.106   49.149   53.094
 density_matrix_trs4                  2  5.0    0.002    0.003   51.140   51.313
 mp_waitall_1                      7374 11.0   23.453   33.298   23.453   33.298
 ls_scf_init_scf                      1  4.0    0.000    0.001   30.736   30.738
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.548   29.647
 matrix_sqrt_Newton_Schulz            2  6.5    0.003    0.015   27.132   27.146
 multiply_cannon_multrec            888  9.7   12.634   15.331   21.256   24.561
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   10.805   22.643
 make_m2s                           222  7.7    0.006    0.007   16.680   17.998
 make_images                        222  8.7    1.973    2.306   16.642   17.957
 make_images_data                   222  9.7    0.003    0.004    9.522   10.658
 hybrid_alltoall_any                227 10.6    0.620    2.863    9.234   10.553
 dbcsr_mm_accdrv_process           3754 10.4    0.259    0.422    8.147    9.402
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.777    8.980    7.777    8.980
 mp_sum_l                           807  5.4    5.268    8.632    5.268    8.632
 multiply_cannon_sync_h2d           888  9.7    6.018    7.135    6.018    7.135
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.448    6.775
 mp_irecv_dv                       2335 11.1    2.434    6.734    2.434    6.734
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.745    6.732
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.954    6.485
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.985    5.209
 arnoldi_extremal                     4  6.8    0.000    0.000    5.121    5.149
 arnoldi_normal_ev                    4  7.8    0.002    0.016    5.121    5.149
 build_subspace                      16  8.4    0.014    0.020    4.800    4.808
 calculate_norms                   1584  9.8    4.254    4.537    4.254    4.537
 dbcsr_matrix_vector_mult           304  9.0    0.005    0.017    3.461    3.791
 mp_allgather_i34                   111  8.7    1.480    3.778    1.480    3.778
 ls_scf_post                          1  4.0    0.003    0.023    3.658    3.663
 dbcsr_matrix_vector_mult_local     304 10.0    3.028    3.604    3.030    3.606
 ls_scf_store_result                  1  5.0    0.000    0.000    3.379    3.485
 ls_scf_dm_to_ks                      2  5.0    0.000    0.001    2.772    2.881
 dbcsr_sort_data                    325 11.1    1.901    2.150    1.901    2.150
 make_images_pack                   222  9.7    1.813    2.132    1.816    2.135
 dbcsr_data_release                9322 10.9    1.299    1.963    1.299    1.963
 dbcsr_finalize                     304  7.8    0.026    0.032    1.614    1.896
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.890    1.893
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.872    1.875
 qs_ks_build_kohn_sham_matrix         3  8.3    0.002    0.007    1.872    1.875
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=93.624000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2252.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.369075E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  13371.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_ISendRecv          138                  86400.
 MP_Wait               600
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.083    0.130   97.394   97.395
 qs_energies                          1  2.0    0.000    0.000   96.723   96.730
 ls_scf                               1  3.0    0.000    0.000   95.060   95.066
 dbcsr_multiply_generic             111  6.7    0.016    0.017   78.913   79.182
 ls_scf_main                          1  4.0    0.000    0.000   59.205   59.206
 multiply_cannon                    111  7.7    0.057    0.124   51.720   56.139
 density_matrix_trs4                  2  5.0    0.002    0.003   53.109   53.262
 multiply_cannon_loop               111  8.7    0.114    0.126   46.706   50.123
 ls_scf_init_scf                      1  4.0    0.000    0.000   32.554   32.556
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   31.414   31.484
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   28.948   28.960
 mp_waitall_1                      6438 11.0   22.829   28.665   22.829   28.665
 multiply_cannon_multrec           1332  9.7   14.291   17.229   22.222   25.449
 make_m2s                           222  7.7    0.006    0.008   21.339   22.719
 make_images                        222  8.7    3.142    3.596   21.289   22.671
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.003    9.173   16.959
 make_images_data                   222  9.7    0.004    0.004   11.910   13.452
 hybrid_alltoall_any                227 10.6    0.797    3.823   11.290   12.945
 dbcsr_mm_accdrv_process           3641 10.4    0.211    0.395    7.572    9.138
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.201    8.716    7.201    8.716
 mp_sum_l                           807  5.4    4.284    8.380    4.284    8.380
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.254    6.618
 multiply_cannon_sync_h2d          1332  9.7    5.523    6.203    5.523    6.203
 multiply_cannon_metrocomm4        1110  9.7    0.004    0.006    2.077    5.996
 mp_irecv_dv                       3229 10.9    2.054    5.913    2.054    5.913
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.574    5.648
 arnoldi_extremal                     4  6.8    0.000    0.000    5.242    5.257
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.242    5.257
 build_subspace                      16  8.4    0.014    0.021    4.892    4.900
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.584    4.747
 calculate_norms                   2376  9.8    4.196    4.548    4.196    4.548
 mp_allgather_i34                   111  8.7    2.128    4.365    2.128    4.365
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.017    3.578    3.884
 dbcsr_matrix_vector_mult_local     304 10.0    3.198    3.701    3.200    3.702
 dbcsr_sort_data                    658 11.4    3.082    3.457    3.082    3.457
 ls_scf_post                          1  4.0    0.000    0.000    3.302    3.309
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.833    3.258
 dbcsr_merge_single_wm              555 10.7    0.536    0.667    2.825    3.250
 ls_scf_store_result                  1  5.0    0.000    0.000    3.011    3.102
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.919    2.962
 dbcsr_data_release               10477 10.7    1.584    2.421    1.584    2.421
 dbcsr_finalize                     304  7.8    0.049    0.061    1.801    1.976
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=97.395000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2722.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.625564E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265558.
 MP_Allreduce         3049                  15663.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_ISendRecv           90                 115200.
 MP_Wait               573
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.044    0.068   98.232   98.233
 qs_energies                          1  2.0    0.000    0.000   97.350   97.356
 ls_scf                               1  3.0    0.000    0.000   95.406   95.411
 dbcsr_multiply_generic             111  6.7    0.017    0.018   77.081   77.320
 ls_scf_main                          1  4.0    0.000    0.000   61.405   61.406
 multiply_cannon                    111  7.7    0.085    0.127   54.929   59.396
 density_matrix_trs4                  2  5.0    0.002    0.003   54.377   54.447
 multiply_cannon_loop               111  8.7    0.069    0.077   50.333   52.060
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.414   30.419
 mp_waitall_1                      5481 11.0   25.714   30.418   25.714   30.418
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.212   29.245
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.038   27.047
 multiply_cannon_multrec            444  9.7   14.012   16.552   21.125   22.870
 make_m2s                           222  7.7    0.004    0.005   17.436   20.032
 make_images                        222  8.7    3.713    4.429   17.375   19.972
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.742   15.832
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    6.074   14.548
 make_images_data                   222  9.7    0.003    0.004    9.697   12.193
 hybrid_alltoall_any                227 10.6    0.789    3.772    9.443   11.967
 dbcsr_mm_accdrv_process           3003 10.4    0.164    0.325    6.821    7.954
 multiply_cannon_sync_h2d           444  9.7    6.593    7.736    6.593    7.736
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.505    7.613    6.505    7.613
 mp_allgather_i34                   111  8.7    2.830    6.983    2.830    6.983
 arnoldi_extremal                     4  6.8    0.000    0.000    5.830    5.843
 arnoldi_normal_ev                    4  7.8    0.002    0.004    5.830    5.843
 build_subspace                      16  8.4    0.015    0.020    5.437    5.447
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.629    4.806
 mp_sum_l                           807  5.4    2.759    4.564    2.759    4.564
 dbcsr_matrix_vector_mult           304  9.0    0.007    0.017    4.200    4.388
 dbcsr_matrix_vector_mult_local     304 10.0    3.730    4.203    3.732    4.205
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.599    3.914
 mp_irecv_dv                       1241 11.2    1.579    3.891    1.579    3.891
 calculate_norms                    792  9.8    3.549    3.667    3.549    3.667
 ls_scf_post                          1  4.0    0.000    0.000    3.587    3.592
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.383    3.489
 ls_scf_store_result                  1  5.0    0.000    0.000    3.356    3.416
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.833    3.317
 make_images_sizes                  222  9.7    0.000    0.000    1.104    3.315
 mp_alltoall_i44                    222 10.7    1.104    3.315    1.104    3.315
 dbcsr_finalize                     304  7.8    0.062    0.078    2.204    2.341
 dbcsr_merge_all                    275  8.9    0.476    0.541    2.047    2.169
 dbcsr_data_release               10123 10.8    1.335    2.009    1.335    2.009
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.992    1.993
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=98.233000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3628.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/fb438e6dbebd23a036a1436a9c376208ffa38d38_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.712258E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284111.
 MP_Allreduce         3043                  21950.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_ISendRecv           84                 732600.
 MP_Wait               309
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.079    0.100  107.875  107.876
 qs_energies                          1  2.0    0.000    0.000  106.458  106.473
 ls_scf                               1  3.0    0.000    0.000  103.522  103.536
 dbcsr_multiply_generic             111  6.7    0.023    0.027   77.493   77.618
 ls_scf_main                          1  4.0    0.000    0.000   65.869   65.870
 density_matrix_trs4                  2  5.0    0.002    0.003   56.969   57.026
 multiply_cannon                    111  7.7    0.159    0.226   50.005   51.865
 multiply_cannon_loop               111  8.7    0.067    0.069   46.494   47.522
 ls_scf_init_scf                      1  4.0    0.000    0.000   33.937   33.937
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   32.583   32.597
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.752   29.763
 mp_waitall_1                      4569 11.1   22.274   26.158   22.274   26.158
 make_m2s                           222  7.7    0.005    0.005   23.921   24.946
 make_images                        222  8.7    4.577    4.962   23.815   24.838
 multiply_cannon_multrec            444  9.7   17.878   18.525   22.539   23.119
 hybrid_alltoall_any                227 10.6    1.657    3.609   12.964   15.680
 make_images_data                   222  9.7    0.003    0.003   13.173   15.640
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.515   11.147
 multiply_cannon_sync_h2d           444  9.7    8.849    8.893    8.849    8.893
 arnoldi_extremal                     4  6.8    0.000    0.000    7.497    7.509
 arnoldi_normal_ev                    4  7.8    0.003    0.009    7.497    7.509
 build_subspace                      16  8.4    0.026    0.037    6.941    6.953
 dbcsr_matrix_vector_mult           304  9.0    0.009    0.026    5.574    5.726
 dbcsr_matrix_vector_mult_local     304 10.0    5.117    5.425    5.119    5.428
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.023    5.288
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    4.846    4.940
 dbcsr_mm_accdrv_process           1814 10.4    0.207    0.324    4.485    4.611
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.188    4.320    4.188    4.320
 ls_scf_post                          1  4.0    0.000    0.000    3.717    3.731
 mp_allgather_i34                   111  8.7    1.143    3.597    1.143    3.597
 make_images_sizes                  222  9.7    0.000    0.000    1.423    3.475
 mp_alltoall_i44                    222 10.7    1.423    3.474    1.423    3.474
 ls_scf_store_result                  1  5.0    0.000    0.000    3.421    3.430
 calculate_norms                    792  9.8    3.239    3.272    3.239    3.272
 dbcsr_finalize                     304  7.8    0.082    0.089    3.077    3.122
 dbcsr_complete_redistribute          5  7.6    1.478    1.515    2.811    2.948
 qs_energies_init_hamiltonians        1  3.0    0.001    0.001    2.905    2.906
 dbcsr_data_release               12724 10.6    2.323    2.905    2.323    2.905
 dbcsr_merge_all                    275  8.9    0.892    0.915    2.862    2.902
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.452    2.599
 dbcsr_sort_data                    325 11.1    2.434    2.500    2.434    2.500
 dbcsr_new_transposed                 4  7.5    0.245    0.254    2.268    2.281
 dbcsr_frobenius_norm                74  6.6    2.054    2.137    2.201    2.242
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.228    2.230
 dbcsr_add_d                        103  6.2    0.000    0.000    2.146    2.222
 dbcsr_add_anytype                  103  7.2    0.859    0.891    2.145    2.221
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.162    2.163
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.000    2.162    2.163
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=107.876000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=6835.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: fb438e6dbebd23a036a1436a9c376208ffa38d38
Summary: empty
Status: OK