=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 41a40305d136800f944133e53e6b8ab400f7c868


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1,
#              Cray-FFTW 3.3.8.10, COSMA 2.6.6, ELPA 2023.05.001,
#              HDF5 1.14.2, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.2.2, LIBVORI 220621, LIBXSMM 1.17,
#              PLUMED 2.9.0, SPGLIB 1.16.2, LIBGRPP 20231215
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Last update: 25.01.2024
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-hdf5 --with-libvdwxc --with-pexsi --with-plumed --with-sirius=no; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.6
USE_ELPA       := 2023.05.001
USE_HDF5       := 1.14.2
USE_LIBGRPP    := 20231215
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.2.2
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.9.0
#USE_QUIP       := 0.9.10
#USE_DEEPMD     := 2.2.7
#USE_SIRIUS     := 7.5.2
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
#LIBVDWXC_VER   := 0.4.0
#SPFFT_VER      := 1.0.6
#SPLA_VER       := 1.5.5
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_DEEPMD),)
   USE_DEEPMD       := $(strip $(USE_DEEPMD))
   DEEPMD_INC       := $(INSTALL_PATH)/libdeepmd_c-$(USE_DEEPMD)/include
   DEEPMD_LIB       := $(INSTALL_PATH)/libdeepmd_c-$(USE_DEEPMD)/lib
   CFLAGS         += -I$(DEEPMD_INC)
   DFLAGS         += -D__DEEPMD
   LIBS           += $(DEEPMD_LIB)/libdeepmd.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_c.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_cc.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_dyn_cudart.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_op.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_op_cuda.so
   LIBS           += $(DEEPMD_LIB)/libtensorflow_cc.so.2
   LIBS           += $(DEEPMD_LIB)/libtensorflow_framework.so.2
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBGRPP),)
   USE_LIBGRPP    := $(strip $(USE_LIBGRPP))
   LIBGRPP_INC    := $(INSTALL_PATH)/libgrpp-main-$(USE_LIBGRPP)/include
   LIBGRPP_LIB    := $(INSTALL_PATH)/libgrpp-main-$(USE_LIBGRPP)/lib
   CFLAGS         += -I$(LIBGRPP_INC)
   DFLAGS         += -D__LIBGRPP
   LIBS           += $(LIBGRPP_LIB)/liblibgrpp.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
endif

ifneq ($(USE_HDF5),)
   USE_HDF5       := $(strip $(USE_HDF5))
   HDF5_INC       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/include
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/lib
   CFLAGS         += -I$(HDF5_INC)
   DFLAGS         += -D__HDF5
   LIBS           += $(HDF5_LIB)/libhdf5_fortran.a
   LIBS           += $(HDF5_LIB)/libhdf5_hl.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/01
 job id: 51775559
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/02
 job id: 51775561
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/03
 job id: 51775562
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/04
 job id: 51775563
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/05
 job id: 51775564
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/06
 job id: 51775565
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/07
 job id: 51775566
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/08
 job id: 51775567
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/09
 job id: 51775568
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/10
 job id: 51775569
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/11
 job id: 51775570
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/12
 job id: 51775571
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/13
 job id: 51775572
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/14
 job id: 51775573
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/15
 job id: 51775577
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/16
 job id: 51775579
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/17
 job id: 51775583
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/18
 job id: 51775586
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/19
 job id: 51775587
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/20
 job id: 51775589
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/21
 job id: 51775590
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/22
 job id: 51775592
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/23
 job id: 51775594
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/24
 job id: 51775595
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/25
 job id: 51775596
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/26
 job id: 51775597
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: 512 H2O (4 NVE MD steps on 64 nodes)
 input file: benchmarks/QS/00512_H2O/H2O-512_md.inp
 required files: []
 output file: result.log
 # nodes = 64
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 20
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/27
 job id: 51775599
 --- Point ---
 name: 601
 plot: h2o_512_md
 regex: CP2K  
 label: (64n/12r/1t)
 --- Point ---
 name: 602
 plot: h2o_512_md_mem
 regex: Estimated peak process memory 
 label: (64n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md_mem", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 172669.
 MP_Allreduce          424                      8.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.035  139.180  139.181
 farming_run                          1  2.0  138.321  138.339  139.151  139.154
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.538925E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              230                1134128.
 MP_Allreduce          571                1938539.
 MP_Sync                25
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split          10
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.005    0.023  119.581  119.582
 qs_energies                          1  2.0    0.000    0.000  119.347  119.350
 mp2_main                             1  3.0    0.000    0.000  117.231  117.233
 mp2_gpw_main                         1  4.0    0.031    0.044  116.202  116.205
 mp2_ri_gpw_compute_in                1  5.0    0.179    0.196   97.127   97.459
 mp2_ri_gpw_compute_in_loop           1  6.0    0.003    0.004   57.312   57.643
 mp2_eri_3c_integrate_gpw           272  7.0    0.150    0.164   43.760   48.838
 get_2c_integrals                     1  6.0    0.008    0.009   39.032   39.640
 integrate_v_rspace                 273  8.0    0.439    0.459   25.519   30.104
 fft_wrap_pw1pw2                   5465 10.4    0.066    0.069   28.113   28.720
 grid_integrate_task_list           273  9.0   20.763   26.292   20.763   26.292
 fft_wrap_pw1pw2_100               2178 11.4    1.312    1.447   25.559   26.193
 compute_2c_integrals                 1  7.0    0.002    0.002   21.454   21.458
 compute_2c_integrals_loop_lm         1  8.0    0.002    0.003   20.838   21.165
 mp2_eri_2c_integrate_gpw             1  9.0    2.391    2.426   20.836   21.162
 rpa_ri_compute_en                    1  5.0    0.019    0.022   18.952   19.057
 cp_fm_cholesky_decompose            12  8.2   17.527   18.101   17.527   18.101
 fft3d_s                           5443 12.4   16.872   17.303   16.893   17.322
 cholesky_decomp                      1  7.0    0.000    0.000   16.416   16.989
 ao_to_mo_and_store_B_mult_1        272  7.0   10.766   15.314   10.766   15.314
 calculate_wavefunction             272  8.0    5.394    5.535   12.902   13.800
 calc_potential_gpw                 544  9.5    0.004    0.004   10.823   11.661
 rpa_num_int                          1  6.0    0.001    0.018   10.684   10.694
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.632   10.663
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.001    9.793   10.553
 potential_pw2rs                    545 10.0    0.106    0.108    8.523   10.351
 calc_mat_Q                           8  8.0    0.000    0.000    9.474    9.562
 contract_S_to_Q                      8  9.0    0.000    0.000    8.899    8.985
 collocate_single_gaussian          272 10.0    0.038    0.040    7.848    8.644
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.500    8.588
 parallel_gemm_fm_cosma              14 10.1    8.500    8.588    8.500    8.588
 create_integ_mat                     1  6.0    0.014    0.028    7.844    7.854
 array2fm                             1  7.0    0.000    0.000    6.741    7.301
 pw_gather_s                       2722 12.2    4.005    4.903    4.005    4.903
 pw_poisson_solve                   545 10.5    0.010    0.011    4.664    4.887
 pw_scatter_s                      2720 12.7    4.439    4.750    4.439    4.750
 pw_poisson_set                     548 11.5    0.018    0.018    3.339    3.560
 array2fm_buffer_send                 1  8.0    2.987    3.124    2.987    3.124
 pw_copy                           4911 11.6    2.618    2.763    2.618    2.763
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=116.201891, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2787.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 200775.
 MP_Allreduce          424                      9.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.067    0.304  536.348  536.351
 farming_run                          1  2.0  533.091  533.252  536.064  536.134
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827128576       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788821       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.274089E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              757                 478553.
 MP_Allreduce         2021                  21391.
 MP_Sync                37
 MP_Alltoall            77                9782991.
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.034  325.462  325.462
 qs_energies                          1  2.0    0.003    0.026  325.132  325.132
 mp2_main                             1  3.0    0.012    0.098  210.001  210.005
 mp2_gpw_main                         1  4.0    0.107    0.346  209.051  209.063
 mp2_ri_gpw_compute_en                1  5.0    0.037    0.041  128.371  130.455
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.739    1.906  126.733  126.739
 scf_env_do_scf                       1  3.0    0.000    0.000  114.842  114.842
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  113.956  113.962
 rebuild_ks_matrix                    4  6.0    0.000    0.000  113.955  113.961
 qs_ks_build_kohn_sham_matrix         4  7.0    0.054    0.060  113.955  113.961
 hfx_ks_matrix                        4  8.0    0.001    0.001  113.604  113.607
 integrate_four_center                4  9.0    0.153    0.472  113.603  113.606
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.464    0.497  111.022  112.033
 local_gemm                         172  8.0  110.558  111.564  110.558  111.564
 integrate_four_center_main           4 10.0    0.096    0.538  102.035  105.118
 integrate_four_center_bin          268 11.0  101.940  104.948  101.940  104.948
 init_scf_loop                        1  4.0    0.000    0.000   96.678   96.678
 mp2_ri_gpw_compute_in                1  5.0    0.066    0.077   80.143   81.171
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   56.758   57.763
 mp2_eri_3c_integrate_gpw            91  7.0    0.147    0.160   44.531   49.233
 integrate_v_rspace                  95  8.0    0.397    0.568   28.165   32.659
 fft_wrap_pw1pw2                   1868 10.4    0.028    0.034   30.553   31.155
 fft_wrap_pw1pw2_100                730 11.4    0.645    0.719   28.255   28.888
 grid_integrate_task_list            95  9.0   23.456   28.211   23.456   28.211
 ao_to_mo_and_store_B_mult_1         91  7.0   10.559   26.982   10.559   26.982
 get_2c_integrals                     1  6.0    0.000    0.000   23.295   23.341
 compute_2c_integrals                 1  7.0    0.002    0.003   22.275   22.303
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.018   21.820   22.163
 mp2_eri_2c_integrate_gpw             1  9.0    1.732    1.894   21.817   22.145
 fft3d_s                           1823 12.4   19.242   19.879   19.256   19.892
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.001   18.162   18.162
 calc_potential_gpw                 182  9.5    0.002    0.002   12.365   13.284
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001   10.753   11.379
 calculate_wavefunction              91  8.0    2.020    2.059   10.044   10.784
 potential_pw2rs                    186 10.0    0.033    0.035    9.094   10.515
 collocate_single_gaussian           91 10.0    0.025    0.155    8.347    9.204
 mp2_ri_gpw_compute_en_comm          22  7.0    0.502    0.528    7.867    9.056
 mp_sendrecv_dm3                   2068  8.0    5.895    7.053    5.895    7.053
 integrate_four_center_load           4 10.0    0.000    0.000    6.750    6.754
 hfx_load_balance                     1 11.0    0.000    0.000    6.750    6.754
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=209.052950, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1527.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             445.521920E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62379.
 MP_Allreduce        10329                    270.
 MP_Sync               530
 MP_Alltoall          2083                 592243.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.060   62.783   62.785
 qs_mol_dyn_low                       1  2.0    0.023    0.063   62.152   62.204
 qs_forces                           11  3.9    0.002    0.003   56.622   56.644
 qs_energies                         11  4.9    0.003    0.005   55.125   55.157
 scf_env_do_scf                      11  5.9    0.000    0.001   48.478   48.478
 scf_env_do_scf_inner_loop          108  6.5    0.015    0.044   46.058   46.059
 qs_scf_new_mos                     108  7.5    0.000    0.001   35.345   35.636
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   35.344   35.636
 dbcsr_multiply_generic            2286 12.5    0.095    0.099   34.846   35.368
 ot_scf_mini                        108  9.5    0.002    0.002   33.638   33.820
 velocity_verlet                     10  3.0    0.047    0.137   32.761   32.824
 multiply_cannon                   2286 13.5    0.188    0.195   26.699   28.132
 multiply_cannon_loop              2286 14.5    1.801    1.905   25.858   27.310
 ot_mini                            108 10.5    0.001    0.001   20.496   20.751
 qs_ot_get_derivative               108 11.5    0.001    0.001   17.451   17.639
 mp_waitall_1                    245248 16.5    9.080   15.302    9.080   15.302
 multiply_cannon_metrocomm3       54864 15.5    0.072    0.078    6.222   13.158
 multiply_cannon_multrec          54864 15.5    3.646    5.733    7.656   11.049
 qs_ot_get_p                        119 10.4    0.001    0.002    8.391    8.692
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.366    8.510
 qs_ks_build_kohn_sham_matrix       119  9.3    0.021    0.100    8.366    8.510
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.406    7.537
 mp_sum_l                          7287 12.8    5.808    7.508    5.808    7.508
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.982    6.425
 multiply_cannon_sync_h2d         54864 15.5    5.140    6.080    5.140    6.080
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    5.882    5.998
 dbcsr_mm_accdrv_process          76910 16.1    1.814    2.861    3.925    5.669
 mp_sum_dm                          438  4.9    5.416    5.549    5.416    5.549
 update_particle_set                 20  4.0    0.000    0.000    5.365    5.466
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    5.351    5.411
 md_output                           10  3.0    0.000    0.000    0.062    5.400
 md_write_output                     11  3.9    0.009    0.168    0.070    5.364
 write_trajectory                    44  4.9    0.002    0.009    0.056    5.195
 write_particle_coordinates          11  5.9    0.054    5.189    0.054    5.189
 init_scf_run                        11  5.9    0.000    0.001    5.162    5.163
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.162    5.162
 sum_up_and_integrate               119 10.3    0.008    0.053    4.817    4.828
 integrate_v_rspace                 119 11.3    0.002    0.002    4.799    4.819
 qs_rho_update_rho_low              119  7.7    0.000    0.001    4.505    4.655
 calculate_rho_elec                 119  8.7    0.012    0.026    4.504    4.655
 cp_dbcsr_syevd                      50 12.0    0.002    0.003    4.519    4.519
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    4.250    4.251
 cp_fm_redistribute_end              50 14.0    2.168    4.220    2.175    4.223
 cp_fm_diag_elpa_base                50 14.0    2.040    4.108    2.044    4.115
 multiply_cannon_metrocomm1       54864 15.5    0.055    0.060    1.924    3.136
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.940    3.135
 apply_single                       119 13.6    0.000    0.000    2.940    3.135
 calculate_dm_sparse                119  9.5    0.000    0.000    2.942    3.106
 calculate_first_density_matrix       1  7.0    0.001    0.009    2.815    2.820
 ot_diis_step                       108 11.5    0.006    0.006    2.768    2.768
 jit_kernel_multiply                 13 15.8    2.048    2.737    2.048    2.737
 acc_transpose_blocks             54864 15.5    0.215    0.234    2.200    2.735
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.503    2.588
 density_rs2pw                      119  9.7    0.003    0.004    2.451    2.540
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.487    2.490
 init_scf_loop                       11  6.9    0.000    0.000    2.394    2.394
 wfi_extrapolate                     11  7.9    0.002    0.013    2.249    2.249
 potential_pw2rs                    119 12.3    0.004    0.004    2.162    2.204
 make_m2s                          4572 13.5    0.053    0.055    2.060    2.137
 grid_integrate_task_list           119 12.3    2.011    2.115    2.011    2.115
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.006    2.057
 make_images                       4572 14.5    0.132    0.139    1.978    2.054
 fft_wrap_pw1pw2                   1201 11.6    0.011    0.013    1.904    2.005
 mp_sum_d                          4139 12.0    1.368    1.922    1.368    1.922
 fft3d_ps                          1201 13.6    0.366    0.472    1.681    1.778
 mp_alltoall_d11v                  2130 13.8    1.522    1.741    1.522    1.741
 transfer_rs2pw                     487 10.6    0.006    0.006    1.621    1.733
 transfer_pw2rs                     487 13.2    0.005    0.006    1.538    1.562
 fft_wrap_pw1pw2_140                487 12.2    0.047    0.052    1.457    1.557
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.406    1.439
 grid_collocate_task_list           119  9.7    1.354    1.424    1.354    1.424
 mp_waitany                       12084 13.8    1.294    1.423    1.294    1.423
 acc_transpose_blocks_sync       164592 16.5    1.185    1.421    1.185    1.421
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=62.785000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=424.272727, yerr=1.052349
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             484.274176E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10306                    303.
 MP_Sync                54
 MP_Alltoall          2060                 934262.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.052   42.221   42.221
 qs_mol_dyn_low                       1  2.0    0.003    0.003   41.793   41.858
 qs_forces                           11  3.9    0.003    0.004   41.706   41.706
 qs_energies                         11  4.9    0.002    0.003   39.973   39.977
 scf_env_do_scf                      11  5.9    0.001    0.001   33.725   33.725
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   30.960   30.960
 dbcsr_multiply_generic            2286 12.5    0.119    0.123   23.179   23.548
 qs_scf_new_mos                     108  7.5    0.001    0.001   21.398   21.620
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   21.397   21.619
 ot_scf_mini                        108  9.5    0.002    0.003   20.447   20.608
 velocity_verlet                     10  3.0    0.011    0.029   19.699   19.700
 multiply_cannon                   2286 13.5    0.210    0.220   17.539   19.107
 multiply_cannon_loop              2286 14.5    1.183    1.252   16.290   17.766
 ot_mini                            108 10.5    0.001    0.001   12.578   12.806
 mp_waitall_1                    200699 16.5    6.280   11.642    6.280   11.642
 qs_ot_get_derivative               108 11.5    0.001    0.001   10.195   10.358
 multiply_cannon_metrocomm3       27432 15.5    0.071    0.073    4.221    9.908
 multiply_cannon_multrec          27432 15.5    1.838    4.137    6.479    9.491
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.349    7.499
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.018    7.349    7.499
 dbcsr_mm_accdrv_process          47894 16.0    3.647    6.345    4.560    6.814
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.479    6.615
 qs_ot_get_p                        119 10.4    0.001    0.001    4.862    5.085
 init_scf_run                        11  5.9    0.000    0.001    4.952    4.952
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    4.952    4.952
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.893    4.739
 sum_up_and_integrate               119 10.3    0.001    0.001    4.187    4.192
 integrate_v_rspace                 119 11.3    0.002    0.003    4.173    4.178
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.136    4.174
 calculate_rho_elec                 119  8.7    0.021    0.024    4.135    4.173
 mp_sum_l                          7287 12.8    2.080    4.162    2.080    4.162
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.021    4.097
 apply_single                       119 13.6    0.000    0.000    3.021    4.097
 make_m2s                          4572 13.5    0.052    0.053    3.102    3.406
 make_images                       4572 14.5    0.206    0.247    3.012    3.318
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.270    3.273
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    3.079    3.097
 init_scf_loop                       11  6.9    0.000    0.000    2.737    2.737
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.661    2.662
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.618    2.621
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.459    2.542
 jit_kernel_multiply                 11 16.2    0.854    2.537    0.854    2.537
 density_rs2pw                      119  9.7    0.003    0.004    2.428    2.516
 calculate_dm_sparse                119  9.5    0.000    0.000    2.290    2.373
 ot_diis_step                       108 11.5    0.011    0.011    2.333    2.333
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.275    2.276
 multiply_cannon_sync_h2d         27432 15.5    1.701    2.270    1.701    2.270
 cp_fm_redistribute_end              50 14.0    1.153    2.246    1.156    2.249
 cp_fm_diag_elpa_base                50 14.0    1.058    2.151    1.087    2.185
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.090    2.131
 acc_transpose_blocks             27432 15.5    0.112    0.118    1.702    2.090
 potential_pw2rs                    119 12.3    0.006    0.006    1.930    1.952
 grid_integrate_task_list           119 12.3    1.834    1.949    1.834    1.949
 fft_wrap_pw1pw2                   1201 11.6    0.012    0.014    1.822    1.869
 prepare_preconditioner              11  7.9    0.000    0.000    1.788    1.814
 make_preconditioner                 11  8.9    0.000    0.000    1.788    1.814
 make_images_data                  4572 15.5    0.047    0.053    1.352    1.789
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.679    1.742
 transfer_rs2pw                     487 10.6    0.005    0.006    1.591    1.672
 wfi_extrapolate                     11  7.9    0.001    0.001    1.616    1.616
 fft3d_ps                          1201 13.6    0.513    0.567    1.542    1.584
 hybrid_alltoall_any               4725 16.4    0.053    0.115    1.180    1.570
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.458    1.468
 mp_alltoall_d11v                  2130 13.8    1.329    1.448    1.329    1.448
 transfer_pw2rs                     487 13.2    0.004    0.005    1.383    1.405
 fft_wrap_pw1pw2_140                487 12.2    0.047    0.051    1.346    1.397
 grid_collocate_task_list           119  9.7    1.290    1.348    1.290    1.348
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.254    1.302
 make_images_sizes                 4572 15.5    0.005    0.005    0.802    1.121
 mp_alltoall_i44                   4572 16.5    0.797    1.116    0.797    1.116
 mp_allgather_i34                  2286 14.5    0.670    1.092    0.670    1.092
 mp_sum_d                          4139 12.0    0.567    1.042    0.567    1.042
 dbcsr_complete_redistribute        329 12.2    0.196    0.380    0.944    1.016
 acc_transpose_blocks_kernels     27432 16.5    0.187    0.274    0.755    1.012
 qs_energies_init_hamiltonians       11  5.9    0.037    0.061    0.990    0.991
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.927    0.940
 acc_transpose_blocks_sync        82296 16.5    0.810    0.938    0.810    0.938
 mp_alltoall_z22v                  1201 15.6    0.769    0.855    0.769    0.855
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=42.221000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=461.363636, yerr=1.067940
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             515.334144E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63490.
 MP_Allreduce        10155                    305.
 MP_Sync                54
 MP_Alltoall          1821                1607811.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.113    0.274   43.078   43.103
 qs_mol_dyn_low                       1  2.0    0.028    0.116   40.861   40.869
 qs_forces                           11  3.9    0.004    0.015   39.697   39.716
 qs_energies                         11  4.9    0.022    0.035   38.052   38.079
 scf_env_do_scf                      11  5.9    0.000    0.001   30.732   30.732
 scf_env_do_scf_inner_loop          108  6.5    0.015    0.069   27.508   27.508
 dbcsr_multiply_generic            2286 12.5    0.119    0.123   20.061   20.293
 velocity_verlet                     10  3.0    0.031    0.107   19.032   19.075
 qs_scf_new_mos                     108  7.5    0.001    0.001   18.491   18.525
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   18.491   18.525
 ot_scf_mini                        108  9.5    0.002    0.002   17.633   17.655
 multiply_cannon                   2286 13.5    0.194    0.197   15.477   16.554
 multiply_cannon_loop              2286 14.5    0.857    0.894   14.489   15.663
 ot_mini                            108 10.5    0.001    0.001   10.913   10.952
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.286    9.310
 multiply_cannon_multrec          18288 15.5    1.854    2.835    7.288    7.720
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.614    6.654
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.014    6.613    6.654
 dbcsr_mm_accdrv_process          38222 16.0    5.287    6.202    5.344    6.266
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.829    5.865
 mp_waitall_1                    158411 16.6    4.033    5.536    4.033    5.536
 init_scf_run                        11  5.9    0.000    0.001    4.993    4.993
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    4.993    4.993
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.732    4.501
 qs_ot_get_p                        119 10.4    0.001    0.002    4.243    4.280
 sum_up_and_integrate               119 10.3    0.001    0.001    4.094    4.099
 integrate_v_rspace                 119 11.3    0.003    0.003    4.081    4.088
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.838    3.854
 calculate_rho_elec                 119  8.7    0.031    0.032    3.838    3.854
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.560    3.561
 multiply_cannon_metrocomm3       18288 15.5    0.047    0.049    2.124    3.416
 init_scf_loop                       11  6.9    0.000    0.000    3.196    3.196
 qs_ot_p2m_diag                      50 11.0    0.012    0.013    2.779    2.787
 calculate_dm_sparse                119  9.5    0.000    0.001    2.612    2.632
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.218    2.626
 apply_single                       119 13.6    0.000    0.000    2.218    2.626
 mp_sum_l                          7287 12.8    1.937    2.518    1.937    2.518
 make_m2s                          4572 13.5    0.044    0.045    2.370    2.511
 density_rs2pw                      119  9.7    0.003    0.003    2.372    2.485
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.446    2.447
 make_images                       4572 14.5    0.193    0.205    2.284    2.426
 acc_transpose_blocks             18288 15.5    0.077    0.080    2.259    2.370
 prepare_preconditioner              11  7.9    0.000    0.000    2.362    2.367
 make_preconditioner                 11  8.9    0.000    0.000    2.362    2.367
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.209    2.295
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.086    2.097
 cp_fm_diag_elpa_base                50 14.0    2.059    2.072    2.084    2.094
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.065    2.073
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.057    2.063
 grid_integrate_task_list           119 12.3    1.793    1.893    1.793    1.893
 potential_pw2rs                    119 12.3    0.007    0.007    1.870    1.881
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.864    1.874
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.852    1.867
 qs_energies_init_hamiltonians       11  5.9    0.030    0.052    1.832    1.854
 fft_wrap_pw1pw2                   1201 11.6    0.012    0.013    1.808    1.832
 transfer_rs2pw                     487 10.6    0.005    0.005    1.638    1.782
 ot_diis_step                       108 11.5    0.012    0.012    1.564    1.564
 acc_transpose_blocks_kernels     18288 16.5    0.215    0.222    1.434    1.540
 fft3d_ps                          1201 13.6    0.533    0.553    1.493    1.520
 create_qs_kind_set                   1  2.0    0.000    0.000    1.133    1.386
 read_qs_kind                         2  3.0    0.162    0.484    1.133    1.386
 parser_read_line                  2821  4.0    0.001    0.001    0.971    1.378
 parser_read_line_low                 5  5.0    0.004    0.111    0.970    1.377
 broadcast_input_information          5  6.0    0.002    0.003    0.966    1.377
 wfi_extrapolate                     11  7.9    0.001    0.001    1.374    1.374
 jit_kernel_transpose                 5 15.6    1.219    1.328    1.219    1.328
 transfer_pw2rs                     487 13.2    0.004    0.004    1.315    1.328
 grid_collocate_task_list           119  9.7    1.234    1.318    1.234    1.318
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.231    1.303
 fft_wrap_pw1pw2_140                487 12.2    0.061    0.063    1.278    1.302
 mp_bcast_i_src                      20  7.0    0.249    1.289    0.249    1.289
 make_images_data                  4572 15.5    0.047    0.051    0.982    1.205
 mp_bcast_am_src                      5  7.0    0.714    1.198    0.715    1.199
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.144    1.190
 multiply_cannon_sync_h2d         18288 15.5    0.960    1.179    0.960    1.179
 mp_sum_dm                          438  4.9    1.072    1.149    1.072    1.149
 cp_fm_cholesky_invert               11 10.9    1.102    1.108    1.102    1.108
 update_particle_set                 20  4.0    0.000    0.000    1.034    1.106
 qs_create_task_list                 11  7.9    0.010    0.048    0.933    1.053
 mp_alltoall_d11v                  2130 13.8    0.875    1.037    0.875    1.037
 generate_qs_task_list               11  8.9    0.151    0.322    0.923    1.013
 hybrid_alltoall_any               4725 16.4    0.058    0.116    0.841    0.986
 parallel_gemm_fm                    81  9.0    0.000    0.000    0.976    0.979
 parallel_gemm_fm_cosma              81 10.0    0.976    0.979    0.976    0.979
 md_write_output                     11  3.9    0.016    0.406    0.054    0.975
 md_output                           10  3.0    0.000    0.000    0.032    0.959
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.857    0.863
 mp_alltoall_z22v                  1201 15.6    0.773    0.862    0.773    0.862
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=43.103000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=490.363636, yerr=2.100767
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             552.898560E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63489.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.041    0.084   40.989   40.994
 qs_mol_dyn_low                       1  2.0    0.029    0.141   40.456   40.488
 qs_forces                           11  3.9    0.002    0.003   39.498   39.530
 qs_energies                         11  4.9    0.015    0.050   37.675   37.699
 scf_env_do_scf                      11  5.9    0.011    0.036   31.539   31.544
 scf_env_do_scf_inner_loop          108  6.5    0.008    0.024   27.502   27.502
 dbcsr_multiply_generic            2286 12.5    0.103    0.106   20.372   20.516
 velocity_verlet                     10  3.0    0.101    0.246   20.227   20.284
 qs_scf_new_mos                     108  7.5    0.001    0.001   18.867   18.930
 qs_scf_loop_do_ot                  108  8.5    0.001    0.003   18.866   18.929
 ot_scf_mini                        108  9.5    0.004    0.014   17.820   17.885
 multiply_cannon                   2286 13.5    0.219    0.226   16.151   16.709
 multiply_cannon_loop              2286 14.5    1.493    1.613   15.117   15.478
 ot_mini                            108 10.5    0.001    0.001   10.634   10.715
 multiply_cannon_multrec          27432 15.5    2.456    3.123    9.023    9.315
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.738    8.805
 dbcsr_mm_accdrv_process          47916 15.9    6.118    7.793    6.465    7.855
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.846    6.899
 qs_ks_build_kohn_sham_matrix       119  9.3    0.035    0.067    6.846    6.898
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.124    6.176
 qs_ot_get_p                        119 10.4    0.002    0.002    4.268    4.340
 init_scf_run                        11  5.9    0.001    0.002    4.327    4.328
 scf_env_initial_rho_setup           11  6.9    0.001    0.003    4.327    4.328
 init_scf_loop                       11  6.9    0.035    0.125    4.004    4.018
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.882    3.924
 calculate_rho_elec                 119  8.7    0.040    0.047    3.882    3.924
 sum_up_and_integrate               119 10.3    0.001    0.001    3.840    3.849
 integrate_v_rspace                 119 11.3    0.002    0.003    3.825    3.838
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.182    3.679
 prepare_preconditioner              11  7.9    0.000    0.000    2.800    2.824
 make_preconditioner                 11  8.9    0.001    0.007    2.800    2.823
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.739    2.749
 make_m2s                          4572 13.5    0.054    0.056    2.620    2.744
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.400    2.706
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.211    2.679
 apply_single                       119 13.6    0.000    0.000    2.211    2.679
 mp_waitall_1                    137007 16.6    2.204    2.679    2.204    2.679
 make_images                       4572 14.5    0.274    0.335    2.511    2.634
 calculate_first_density_matrix       1  7.0    0.009    0.036    2.562    2.584
 acc_transpose_blocks             27432 15.5    0.113    0.116    2.398    2.545
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.430    2.458
 density_rs2pw                      119  9.7    0.009    0.026    2.272    2.351
 cp_dbcsr_syevd                      50 12.0    0.007    0.015    2.331    2.332
 calculate_dm_sparse                119  9.5    0.000    0.000    2.208    2.258
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.033    2.045
 cp_fm_diag_elpa_base                50 14.0    1.988    2.003    2.023    2.031
 fft_wrap_pw1pw2                   1201 11.6    0.012    0.014    1.950    1.990
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.951    1.957
 grid_integrate_task_list           119 12.3    1.829    1.910    1.829    1.910
 ot_diis_step                       108 11.5    0.012    0.012    1.855    1.855
 mp_sum_l                          7287 12.8    1.223    1.782    1.223    1.782
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.739    1.752
 transfer_rs2pw                     487 10.6    0.004    0.005    1.544    1.711
 fft3d_ps                          1201 13.6    0.577    0.636    1.583    1.650
 potential_pw2rs                    119 12.3    0.013    0.022    1.630    1.645
 fft_wrap_pw1pw2_140                487 12.2    0.074    0.081    1.556    1.596
 acc_transpose_blocks_sync        82296 16.5    1.383    1.541    1.383    1.541
 qs_energies_init_hamiltonians       11  5.9    0.006    0.007    1.444    1.476
 wfi_extrapolate                     11  7.9    0.001    0.001    1.472    1.472
 grid_collocate_task_list           119  9.7    1.286    1.411    1.286    1.411
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.309    1.327
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.296    1.307
 dbcsr_complete_redistribute        329 12.2    0.119    0.162    0.881    1.184
 make_images_data                  4572 15.5    0.047    0.052    1.006    1.182
 cp_fm_upper_to_full                 72 14.2    0.831    1.180    0.831    1.180
 multiply_cannon_metrocomm3       27432 15.5    0.040    0.042    0.712    1.150
 transfer_pw2rs                     487 13.2    0.004    0.004    1.077    1.093
 jit_kernel_multiply                  6 15.8    0.282    1.085    0.282    1.085
 hybrid_alltoall_any               4725 16.4    0.066    0.155    0.843    1.063
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    0.918    0.996
 mp_waitany                        7280 13.7    0.784    0.969    0.784    0.969
 mp_alltoall_z22v                  1201 15.6    0.890    0.959    0.890    0.959
 mp_alltoall_d11v                  2130 13.8    0.820    0.955    0.820    0.955
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.643    0.939
 transfer_rs2pw_140                 130 11.5    0.108    0.120    0.758    0.935
 mp_sum_dm                          438  4.9    0.763    0.906    0.763    0.906
 acc_transpose_blocks_kernels     27432 16.5    0.273    0.297    0.873    0.905
 build_core_hamiltonian_matrix_      11  4.9    0.014    0.092    0.815    0.888
 cp_fm_cholesky_invert               11 10.9    0.871    0.874    0.871    0.874
 update_particle_set                 20  4.0    0.000    0.000    0.733    0.874
 mp_alltoall_i22                    627 13.8    0.545    0.854    0.545    0.854
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.846    0.850
 mp_sum_d                          4137 12.0    0.636    0.832    0.636    0.832
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=40.994000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=525.818182, yerr=2.854603
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             628.064256E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63488.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.068   32.800   32.800
 qs_mol_dyn_low                       1  2.0    0.003    0.003   32.397   32.404
 qs_forces                           11  3.9    0.004    0.005   32.070   32.071
 qs_energies                         11  4.9    0.002    0.002   30.316   30.321
 scf_env_do_scf                      11  5.9    0.000    0.001   24.981   24.981
 scf_env_do_scf_inner_loop          108  6.5    0.013    0.018   22.076   22.076
 velocity_verlet                     10  3.0    0.047    0.058   16.543   16.563
 dbcsr_multiply_generic            2286 12.5    0.101    0.104   14.569   14.692
 qs_scf_new_mos                     108  7.5    0.001    0.001   13.771   13.795
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   13.771   13.795
 ot_scf_mini                        108  9.5    0.002    0.002   12.982   13.008
 multiply_cannon                   2286 13.5    0.226    0.236   11.297   11.873
 multiply_cannon_loop              2286 14.5    0.643    0.662   10.203   10.482
 ot_mini                            108 10.5    0.001    0.001    7.466    7.504
 multiply_cannon_multrec           9144 15.5    1.807    1.987    6.273    6.464
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.163    6.194
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.014    6.163    6.193
 qs_ot_get_derivative               108 11.5    0.001    0.001    6.092    6.118
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.496    5.524
 dbcsr_mm_accdrv_process          12550 15.8    3.627    4.380    4.357    4.462
 sum_up_and_integrate               119 10.3    0.001    0.001    3.742    3.746
 integrate_v_rspace                 119 11.3    0.003    0.003    3.731    3.736
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.692    3.707
 calculate_rho_elec                 119  8.7    0.060    0.061    3.692    3.706
 init_scf_run                        11  5.9    0.000    0.001    3.668    3.668
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.667    3.668
 qs_ot_get_p                        119 10.4    0.001    0.002    3.466    3.502
 mp_waitall_1                    115863 16.7    2.164    2.959    2.164    2.959
 init_scf_loop                       11  6.9    0.000    0.000    2.878    2.879
 make_m2s                          4572 13.5    0.034    0.035    2.266    2.457
 make_images                       4572 14.5    0.269    0.300    2.176    2.367
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.336    2.337
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    2.259    2.262
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.151    2.169
 density_rs2pw                      119  9.7    0.003    0.003    2.036    2.116
 prepare_preconditioner              11  7.9    0.000    0.000    2.095    2.101
 make_preconditioner                 11  8.9    0.000    0.000    2.095    2.101
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.961    2.000
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.984    1.985
 fft_wrap_pw1pw2                   1201 11.6    0.012    0.013    1.943    1.956
 grid_integrate_task_list           119 12.3    1.881    1.956    1.881    1.956
 calculate_dm_sparse                119  9.5    0.000    0.000    1.880    1.904
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.691    1.693
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.622    1.630
 cp_fm_diag_elpa_base                50 14.0    1.592    1.612    1.620    1.628
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.589    1.604
 fft3d_ps                          1201 13.6    0.642    0.656    1.556    1.573
 acc_transpose_blocks              9144 15.5    0.040    0.042    1.531    1.572
 jit_kernel_multiply                  8 15.5    0.691    1.564    0.691    1.564
 fft_wrap_pw1pw2_140                487 12.2    0.094    0.095    1.530    1.542
 potential_pw2rs                    119 12.3    0.010    0.010    1.506    1.511
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.433    1.442
 qs_energies_init_hamiltonians       11  5.9    0.068    0.106    1.417    1.420
 grid_collocate_task_list           119  9.7    1.308    1.370    1.308    1.370
 ot_diis_step                       108 11.5    0.013    0.013    1.346    1.346
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.279    1.316
 apply_single                       119 13.6    0.000    0.000    1.278    1.316
 make_images_data                  4572 15.5    0.042    0.046    1.010    1.291
 hybrid_alltoall_any               4725 16.4    0.065    0.176    0.955    1.285
 wfi_extrapolate                     11  7.9    0.001    0.001    1.273    1.273
 transfer_rs2pw                     487 10.6    0.004    0.005    1.125    1.227
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.198    1.206
 cp_fm_cholesky_invert               11 10.9    1.124    1.128    1.124    1.128
 mp_alltoall_d11v                  2130 13.8    0.951    1.039    0.951    1.039
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.935    0.949
 transfer_pw2rs                     487 13.2    0.003    0.003    0.937    0.942
 mp_allgather_i34                  2286 14.5    0.406    0.942    0.406    0.942
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.876    0.928
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    0.394    0.841
 mp_alltoall_z22v                  1201 15.6    0.789    0.833    0.789    0.833
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    0.742    0.808
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.791    0.793
 acc_transpose_blocks_kernels      9144 16.5    0.117    0.119    0.758    0.771
 multiply_cannon_metrocomm1        9144 15.5    0.023    0.024    0.513    0.760
 acc_transpose_blocks_sync        27432 16.5    0.717    0.748    0.717    0.748
 mp_sum_l                          7287 12.8    0.588    0.742    0.588    0.742
 dbcsr_complete_redistribute        329 12.2    0.168    0.229    0.697    0.733
 make_images_sizes                 4572 15.5    0.005    0.005    0.474    0.676
 mp_alltoall_i44                   4572 16.5    0.469    0.671    0.469    0.671
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=32.800000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=593.454545, yerr=7.831912
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             782.290944E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63723.
 MP_Allreduce        10154                    429.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.041   46.610   46.610
 qs_mol_dyn_low                       1  2.0    0.003    0.003   45.991   45.999
 qs_forces                           11  3.9    0.003    0.004   45.892   45.893
 qs_energies                         11  4.9    0.005    0.012   43.832   43.835
 scf_env_do_scf                      11  5.9    0.001    0.001   37.024   37.024
 scf_env_do_scf_inner_loop          108  6.5    0.005    0.009   28.673   28.674
 velocity_verlet                     10  3.0    0.028    0.029   25.357   25.365
 dbcsr_multiply_generic            2286 12.5    0.116    0.117   20.801   20.886
 qs_scf_new_mos                     108  7.5    0.001    0.001   18.795   18.867
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   18.794   18.867
 ot_scf_mini                        108  9.5    0.002    0.002   17.590   17.672
 multiply_cannon                   2286 13.5    0.296    0.303   16.260   17.073
 multiply_cannon_loop              2286 14.5    0.880    0.914   14.823   15.528
 ot_mini                            108 10.5    0.001    0.001   10.737   10.826
 multiply_cannon_multrec           9144 15.5    3.417    4.758    9.243    9.535
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.667    8.747
 init_scf_loop                       11  6.9    0.000    0.000    8.322    8.323
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.460    7.575
 qs_ks_build_kohn_sham_matrix       119  9.3    0.020    0.034    7.460    7.575
 prepare_preconditioner              11  7.9    0.000    0.000    7.300    7.317
 make_preconditioner                 11  8.9    0.000    0.000    7.300    7.317
 dbcsr_mm_accdrv_process          12550 15.8    4.864    7.164    5.689    7.197
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.802    7.190
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.714    6.821
 cp_fm_upper_to_full                 72 14.2    3.248    4.687    3.248    4.687
 init_scf_run                        11  5.9    0.000    0.001    4.646    4.646
 scf_env_initial_rho_setup           11  6.9    0.002    0.002    4.646    4.646
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.269    4.293
 calculate_rho_elec                 119  8.7    0.118    0.121    4.269    4.292
 mp_waitall_1                     94719 16.7    3.114    4.174    3.114    4.174
 sum_up_and_integrate               119 10.3    0.001    0.001    4.012    4.017
 integrate_v_rspace                 119 11.3    0.003    0.003    4.002    4.007
 qs_ot_get_p                        119 10.4    0.002    0.003    3.760    3.870
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.420    3.836
 make_m2s                          4572 13.5    0.037    0.038    2.929    3.224
 make_images                       4572 14.5    0.352    0.383    2.808    3.103
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.081    3.087
 dbcsr_complete_redistribute        329 12.2    0.297    0.304    2.108    2.963
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.770    2.628
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.260    2.604
 apply_single                       119 13.6    0.000    0.000    2.260    2.604
 fft_wrap_pw1pw2                   1201 11.6    0.014    0.014    2.560    2.573
 calculate_dm_sparse                119  9.5    0.000    0.000    2.528    2.547
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    1.636    2.510
 mp_alltoall_i22                    627 13.8    1.564    2.470    1.564    2.470
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.493    2.345
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.247    2.288
 qs_ot_p2m_diag                      50 11.0    0.043    0.043    2.264    2.269
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.264    2.264
 density_rs2pw                      119  9.7    0.003    0.003    2.222    2.241
 grid_integrate_task_list           119 12.3    2.143    2.173    2.143    2.173
 acc_transpose_blocks              9144 15.5    0.044    0.044    2.088    2.147
 fft_wrap_pw1pw2_140                487 12.2    0.179    0.180    2.130    2.146
 ot_diis_step                       108 11.5    0.014    0.014    2.046    2.046
 fft3d_ps                          1201 13.6    0.860    0.880    2.012    2.022
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.904    1.958
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.881    1.881
 hybrid_alltoall_any               4725 16.4    0.090    0.151    1.368    1.858
 make_images_data                  4572 15.5    0.045    0.048    1.367    1.829
 qs_energies_init_hamiltonians       11  5.9    0.022    0.023    1.827    1.828
 mp_sum_l                          7287 12.8    1.061    1.673    1.061    1.673
 cp_fm_cholesky_invert               11 10.9    1.660    1.664    1.660    1.664
 grid_collocate_task_list           119  9.7    1.565    1.583    1.565    1.583
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.551    1.551
 cp_fm_diag_elpa_base                50 14.0    1.404    1.461    1.550    1.550
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.491    1.501
 wfi_extrapolate                     11  7.9    0.001    0.001    1.494    1.494
 potential_pw2rs                    119 12.3    0.013    0.014    1.431    1.435
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.392    1.410
 mp_alltoall_d11v                  2130 13.8    1.283    1.331    1.283    1.331
 acc_transpose_blocks_sync        27432 16.5    1.131    1.187    1.131    1.187
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.131    1.144
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.003    1.061
 jit_kernel_multiply                  5 15.4    0.796    1.057    0.796    1.057
 mp_alltoall_z22v                  1201 15.6    1.017    1.042    1.017    1.042
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.955    0.964
 transfer_rs2pw                     487 10.6    0.005    0.005    0.911    0.959
 mp_allgather_i34                  2286 14.5    0.393    0.949    0.393    0.949
 qs_create_task_list                 11  7.9    0.000    0.000    0.934    0.944
 generate_qs_task_list               11  8.9    0.367    0.386    0.934    0.944
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=46.610000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=733.545455, yerr=15.263525
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             496.922624E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66212.
 MP_Allreduce         9776                    488.
 MP_Sync                52
 MP_Alltoall          1938                1383689.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.028    0.094   87.347   87.352
 qs_mol_dyn_low                       1  2.0    0.011    0.040   86.242   86.252
 qs_forces                           11  3.9    0.003    0.004   86.000   86.004
 qs_energies                         11  4.9    0.004    0.006   83.058   83.075
 scf_env_do_scf                      11  5.9    0.000    0.001   73.014   73.016
 scf_env_do_scf_inner_loop           99  6.5    0.006    0.021   66.838   66.839
 dbcsr_multiply_generic            2055 12.4    0.106    0.108   52.121   52.389
 qs_scf_new_mos                      99  7.5    0.000    0.001   49.162   49.286
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   49.161   49.285
 ot_scf_mini                         99  9.5    0.002    0.002   46.768   46.868
 velocity_verlet                     10  3.0    0.023    0.075   44.917   44.919
 multiply_cannon                   2055 13.4    0.186    0.190   43.140   44.093
 multiply_cannon_loop              2055 14.4    1.770    1.815   42.032   42.923
 ot_mini                             99 10.5    0.001    0.001   27.213   27.331
 qs_ot_get_derivative                99 11.5    0.001    0.001   20.421   20.553
 multiply_cannon_multrec          49320 15.4   11.312   12.222   17.378   18.447
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.920   15.066
 qs_ks_build_kohn_sham_matrix       110  9.3    0.017    0.068   14.920   15.066
 qs_ks_update_qs_env                110  7.6    0.001    0.001   13.087   13.219
 mp_waitall_1                    220248 16.4   11.591   12.403   11.591   12.403
 qs_ot_get_p                        110 10.4    0.001    0.001   11.037   11.211
 multiply_cannon_sync_h2d         49320 15.4    9.535   10.137    9.535   10.137
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.958    8.503
 multiply_cannon_metrocomm3       49320 15.4    0.082    0.086    6.699    7.906
 init_scf_run                        11  5.9    0.000    0.001    7.733    7.734
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    7.733    7.734
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.263    7.722
 apply_single                       110 13.6    0.000    0.000    7.263    7.722
 qs_ot_p2m_diag                      48 11.0    0.012    0.019    7.517    7.542
 sum_up_and_integrate               110 10.3    0.002    0.003    7.280    7.316
 integrate_v_rspace                 110 11.3    0.003    0.003    7.255    7.296
 qs_rho_update_rho_low              110  7.6    0.000    0.001    6.639    6.772
 calculate_rho_elec                 110  8.6    0.020    0.024    6.638    6.771
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    6.635    6.635
 ot_diis_step                        99 11.5    0.006    0.006    6.593    6.594
 dbcsr_mm_accdrv_process          87628 16.1    3.054    3.181    5.936    6.221
 init_scf_loop                       11  6.9    0.000    0.000    6.144    6.145
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    5.941    5.962
 cp_fm_diag_elpa_base                48 14.0    5.923    5.946    5.939    5.960
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    5.455    5.541
 mp_sum_l                          6594 12.7    4.370    5.269    4.370    5.269
 make_m2s                          4110 13.4    0.061    0.065    4.339    4.442
 make_images                       4110 14.4    0.178    0.190    4.241    4.349
 wfi_extrapolate                     11  7.9    0.001    0.001    4.145    4.145
 calculate_dm_sparse                110  9.5    0.001    0.001    3.938    4.031
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.004    4.012    4.016
 prepare_preconditioner              11  7.9    0.000    0.000    3.895    3.914
 make_preconditioner                 11  8.9    0.000    0.000    3.895    3.914
 density_rs2pw                      110  9.6    0.003    0.004    3.672    3.822
 multiply_cannon_metrocomm1       49320 15.4    0.062    0.064    2.630    3.764
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.684    3.728
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.555    3.601
 calculate_first_density_matrix       1  7.0    0.000    0.001    3.483    3.488
 grid_integrate_task_list           110 12.3    3.258    3.470    3.258    3.470
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    3.239    3.292
 fft_wrap_pw1pw2                   1111 11.6    0.014    0.017    2.986    3.097
 potential_pw2rs                    110 12.3    0.006    0.006    2.905    2.936
 jit_kernel_multiply                 13 15.9    2.603    2.878    2.603    2.878
 acc_transpose_blocks             49320 15.4    0.202    0.212    2.715    2.876
 fft3d_ps                          1111 13.6    0.731    0.821    2.529    2.625
 fft_wrap_pw1pw2_140                451 12.1    0.092    0.101    2.417    2.534
 mp_alltoall_d11v                  2046 13.8    2.098    2.506    2.098    2.506
 transfer_rs2pw                     451 10.6    0.006    0.006    2.283    2.443
 grid_collocate_task_list           110  9.6    2.160    2.264    2.160    2.264
 cp_fm_cholesky_invert               11 10.9    2.125    2.130    2.125    2.130
 transfer_pw2rs                     451 13.1    0.006    0.006    2.077    2.117
 mp_waitany                       14300 13.8    1.871    2.078    1.871    2.078
 make_images_data                  4110 15.4    0.043    0.047    1.879    2.039
 mp_sum_d                          3893 11.9    1.402    2.011    1.402    2.011
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.973    1.987
 hybrid_alltoall_any               4261 16.3    0.084    0.481    1.615    1.864
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.831    1.856
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=87.352000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=472.090909, yerr=2.314168
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             586.706944E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66430.
 MP_Allreduce         9775                    566.
 MP_Sync                52
 MP_Alltoall          1717                2933536.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.045    0.078   74.213   74.214
 qs_mol_dyn_low                       1  2.0    0.011    0.071   73.682   73.693
 qs_forces                           11  3.9    0.008    0.024   73.408   73.422
 qs_energies                         11  4.9    0.002    0.003   70.003   70.025
 scf_env_do_scf                      11  5.9    0.000    0.001   60.840   60.843
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   52.062   52.063
 dbcsr_multiply_generic            2055 12.4    0.119    0.132   39.227   39.543
 velocity_verlet                     10  3.0    0.028    0.207   38.553   38.561
 qs_scf_new_mos                      99  7.5    0.001    0.001   35.642   35.779
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   35.641   35.779
 ot_scf_mini                         99  9.5    0.003    0.003   33.916   34.050
 multiply_cannon                   2055 13.4    0.226    0.245   31.863   32.783
 multiply_cannon_loop              2055 14.4    1.160    1.184   30.363   31.592
 ot_mini                             99 10.5    0.001    0.001   19.345   19.485
 multiply_cannon_multrec          24660 15.4    6.956    8.664   14.004   15.885
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.963   14.098
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   13.963   14.097
 qs_ot_get_derivative                99 11.5    0.001    0.001   13.504   13.635
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.295   12.415
 mp_waitall_1                    176588 16.5    8.207   11.451    8.207   11.451
 init_scf_loop                       11  6.9    0.000    0.000    8.731    8.732
 multiply_cannon_metrocomm3       24660 15.4    0.072    0.074    5.409    8.327
 qs_ot_get_p                        110 10.4    0.001    0.002    7.759    7.926
 multiply_cannon_sync_h2d         24660 15.4    6.363    7.448    6.363    7.448
 dbcsr_mm_accdrv_process          52282 16.1    5.522    6.400    6.881    7.226
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.462    7.063
 apply_single                       110 13.6    0.000    0.001    6.462    7.063
 sum_up_and_integrate               110 10.3    0.001    0.003    6.647    6.658
 prepare_preconditioner              11  7.9    0.000    0.000    6.625    6.650
 make_preconditioner                 11  8.9    0.000    0.000    6.625    6.650
 integrate_v_rspace                 110 11.3    0.002    0.003    6.620    6.632
 init_scf_run                        11  5.9    0.000    0.001    6.593    6.593
 scf_env_initial_rho_setup           11  6.9    0.001    0.003    6.592    6.593
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.190    6.358
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.130    6.142
 calculate_rho_elec                 110  8.6    0.039    0.047    6.129    6.141
 ot_diis_step                        99 11.5    0.010    0.011    5.766    5.766
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    5.001    5.728
 qs_ot_p2m_diag                      48 11.0    0.029    0.044    5.552    5.578
 make_m2s                          4110 13.4    0.057    0.059    4.594    5.120
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    5.022    5.023
 make_images                       4110 14.4    0.406    0.463    4.480    5.002
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.291    4.307
 cp_fm_diag_elpa_base                48 14.0    4.237    4.252    4.288    4.304
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.654    3.723
 wfi_extrapolate                     11  7.9    0.001    0.001    3.678    3.678
 density_rs2pw                      110  9.6    0.004    0.004    3.414    3.596
 fft_wrap_pw1pw2                   1111 11.6    0.014    0.016    3.322    3.481
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.478    3.481
 grid_integrate_task_list           110 12.3    3.131    3.360    3.131    3.360
 cp_fm_cholesky_invert               11 10.9    3.254    3.263    3.254    3.263
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.046    3.095
 calculate_dm_sparse                110  9.5    0.001    0.001    3.042    3.068
 mp_sum_l                          6594 12.7    2.177    2.966    2.177    2.966
 make_images_data                  4110 15.4    0.048    0.052    2.384    2.911
 hybrid_alltoall_any               4261 16.3    0.105    0.452    2.066    2.860
 fft3d_ps                          1111 13.6    1.072    1.281    2.703    2.855
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.813    2.816
 fft_wrap_pw1pw2_140                451 12.1    0.112    0.120    2.566    2.724
 potential_pw2rs                    110 12.3    0.008    0.008    2.548    2.572
 grid_collocate_task_list           110  9.6    2.166    2.327    2.166    2.327
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    2.117    2.142
 mp_alltoall_d11v                  2046 13.8    1.817    2.126    1.817    2.126
 transfer_rs2pw                     451 10.6    0.007    0.007    1.886    2.107
 acc_transpose_blocks             24660 15.4    0.112    0.117    2.053    2.092
 qs_energies_init_hamiltonians       11  5.9    0.003    0.016    2.014    2.034
 jit_kernel_multiply                 10 16.3    0.992    1.907    0.992    1.907
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.833    1.850
 mp_allgather_i34                  2055 14.4    0.806    1.806    0.806    1.806
 cp_fm_cholesky_decompose            22 10.9    1.723    1.731    1.723    1.731
 multiply_cannon_metrocomm4       22605 15.4    0.078    0.083    0.766    1.623
 dbcsr_complete_redistribute        325 12.2    0.242    0.350    1.327    1.613
 transfer_pw2rs                     451 13.1    0.005    0.006    1.566    1.592
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.562    1.579
 mp_waitany                       10164 13.8    1.291    1.536    1.291    1.536
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.003    1.388    1.490
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=74.214000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=555.363636, yerr=6.168435
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             665.554944E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66421.
 MP_Allreduce         9774                    562.
 MP_Sync                52
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.128    0.159   68.432   68.435
 qs_mol_dyn_low                       1  2.0    0.021    0.144   67.566   67.575
 qs_forces                           11  3.9    0.019    0.026   67.126   67.128
 qs_energies                         11  4.9    0.028    0.040   63.810   63.829
 scf_env_do_scf                      11  5.9    0.001    0.002   54.624   54.625
 scf_env_do_scf_inner_loop           99  6.5    0.007    0.013   44.693   44.695
 velocity_verlet                     10  3.0    0.014    0.105   35.464   35.492
 dbcsr_multiply_generic            2055 12.4    0.115    0.118   30.911   31.360
 qs_scf_new_mos                      99  7.5    0.001    0.001   29.673   29.769
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   29.672   29.768
 ot_scf_mini                         99  9.5    0.002    0.003   28.349   28.460
 multiply_cannon                   2055 13.4    0.212    0.222   23.580   24.977
 multiply_cannon_loop              2055 14.4    0.813    0.848   22.137   24.011
 ot_mini                             99 10.5    0.001    0.001   15.166   15.284
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.509   12.680
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   12.508   12.680
 mp_waitall_1                    139946 16.5    7.741   12.229    7.741   12.229
 multiply_cannon_multrec          16440 15.4    3.769    5.346    9.993   11.512
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.020   11.172
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.631   10.744
 init_scf_loop                       11  6.9    0.000    0.001    9.882    9.883
 multiply_cannon_metrocomm3       16440 15.4    0.045    0.046    4.530    8.862
 qs_ot_get_p                        110 10.4    0.001    0.001    8.120    8.269
 prepare_preconditioner              11  7.9    0.000    0.000    8.058    8.074
 make_preconditioner                 11  8.9    0.000    0.001    8.058    8.074
 make_full_inverse_cholesky          11  9.9    0.000    0.000    7.330    7.690
 sum_up_and_integrate               110 10.3    0.001    0.002    6.575    6.590
 integrate_v_rspace                 110 11.3    0.003    0.003    6.549    6.564
 init_scf_run                        11  5.9    0.000    0.001    6.539    6.539
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.539    6.539
 dbcsr_mm_accdrv_process          34862 16.1    5.364    5.702    6.072    6.285
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    6.042    6.063
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.862    5.873
 calculate_rho_elec                 110  8.6    0.058    0.058    5.862    5.872
 cp_dbcsr_syevd                      48 12.0    0.014    0.091    5.606    5.607
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.986    5.556
 apply_single                       110 13.6    0.000    0.000    4.986    5.556
 make_m2s                          4110 13.4    0.049    0.050    4.587    5.083
 make_images                       4110 14.4    0.396    0.516    4.469    4.966
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.775    4.788
 cp_fm_diag_elpa_base                48 14.0    4.672    4.709    4.747    4.756
 ot_diis_step                        99 11.5    0.011    0.011    4.468    4.468
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.622    4.270
 multiply_cannon_sync_h2d         16440 15.4    3.270    3.920    3.270    3.920
 cp_fm_cholesky_invert               11 10.9    3.447    3.457    3.447    3.457
 grid_integrate_task_list           110 12.3    3.176    3.432    3.176    3.432
 density_rs2pw                      110  9.6    0.004    0.007    3.119    3.335
 calculate_first_density_matrix       1  7.0    0.001    0.010    3.249    3.252
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.227    3.231
 wfi_extrapolate                     11  7.9    0.001    0.001    3.184    3.184
 fft_wrap_pw1pw2                   1111 11.6    0.013    0.017    3.129    3.148
 make_images_data                  4110 15.4    0.045    0.050    2.436    3.068
 mp_sum_l                          6594 12.7    2.158    3.023    2.158    3.023
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.955    3.014
 hybrid_alltoall_any               4261 16.3    0.108    0.379    2.140    2.935
 calculate_dm_sparse                110  9.5    0.001    0.001    2.859    2.896
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.782    2.842
 fft_wrap_pw1pw2_140                451 12.1    0.127    0.133    2.527    2.549
 fft3d_ps                          1111 13.6    1.085    1.115    2.445    2.461
 potential_pw2rs                    110 12.3    0.010    0.016    2.432    2.454
 grid_collocate_task_list           110  9.6    2.216    2.453    2.216    2.453
 multiply_cannon_metrocomm4       14385 15.4    0.048    0.053    0.864    2.446
 mp_irecv_dv                      48980 15.7    0.789    2.309    0.789    2.309
 mp_alltoall_d11v                  2046 13.8    1.968    2.221    1.968    2.221
 dbcsr_complete_redistribute        325 12.2    0.334    0.366    1.717    2.195
 qs_energies_init_hamiltonians       11  5.9    0.005    0.021    2.023    2.041
 cp_fm_cholesky_decompose            22 10.9    2.018    2.040    2.018    2.040
 acc_transpose_blocks             16440 15.4    0.075    0.077    1.837    2.026
 transfer_rs2pw                     451 10.6    0.005    0.006    1.636    1.823
 cp_fm_upper_to_full                 70 14.2    1.415    1.792    1.415    1.792
 mp_allgather_i34                  2055 14.4    0.755    1.777    0.755    1.777
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.734    1.748
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.690    1.709
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    1.159    1.624
 transfer_pw2rs                     451 13.1    0.005    0.005    1.544    1.563
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.008    1.373    1.515
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    1.481    1.496
 mp_waitany                       17072 13.8    1.233    1.437    1.233    1.437
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=68.435000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=629.727273, yerr=9.026133
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             735.592448E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66419.
 MP_Allreduce         9774                    603.
 MP_Sync                52
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.058   72.311   72.312
 qs_mol_dyn_low                       1  2.0    0.003    0.003   71.698   71.723
 qs_forces                           11  3.9    0.004    0.005   71.617   71.618
 qs_energies                         11  4.9    0.001    0.002   68.181   68.186
 scf_env_do_scf                      11  5.9    0.000    0.001   57.936   57.938
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   45.187   45.188
 velocity_verlet                     10  3.0    0.015    0.017   38.833   38.836
 dbcsr_multiply_generic            2055 12.4    0.141    0.145   33.084   33.304
 qs_scf_new_mos                      99  7.5    0.001    0.001   30.347   30.446
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   30.347   30.446
 ot_scf_mini                         99  9.5    0.002    0.003   28.629   28.738
 multiply_cannon                   2055 13.4    0.236    0.259   25.328   26.666
 multiply_cannon_loop              2055 14.4    1.388    1.479   23.745   24.383
 ot_mini                             99 10.5    0.001    0.001   15.984   16.106
 multiply_cannon_multrec          24660 15.4    4.112    6.920   14.113   15.549
 init_scf_loop                       11  6.9    0.000    0.000   12.703   12.704
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.219   12.321
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   12.218   12.321
 qs_ot_get_derivative                99 11.5    0.001    0.001   11.770   11.881
 prepare_preconditioner              11  7.9    0.000    0.000   10.912   10.931
 make_preconditioner                 11  8.9    0.000    0.000   10.912   10.931
 dbcsr_mm_accdrv_process          52304 16.0    8.444   10.248    9.845   10.915
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.805   10.894
 make_full_inverse_cholesky          11  9.9    0.000    0.000    9.114   10.598
 qs_ot_get_p                        110 10.4    0.001    0.002    7.396    7.546
 init_scf_run                        11  5.9    0.000    0.001    7.444    7.445
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    7.444    7.445
 mp_waitall_1                    121746 16.5    5.005    6.974    5.005    6.974
 sum_up_and_integrate               110 10.3    0.001    0.002    6.373    6.387
 integrate_v_rspace                 110 11.3    0.003    0.003    6.347    6.360
 make_m2s                          4110 13.4    0.060    0.061    5.842    6.162
 make_images                       4110 14.4    0.572    0.694    5.699    6.017
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.855    5.928
 calculate_rho_elec                 110  8.6    0.077    0.081    5.855    5.928
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    5.274    5.292
 cp_fm_upper_to_full                 70 14.2    3.395    4.928    3.395    4.928
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    4.674    4.675
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.169    4.298
 apply_single                       110 13.6    0.000    0.000    4.169    4.298
 calculate_first_density_matrix       1  7.0    0.000    0.000    4.234    4.241
 ot_diis_step                        99 11.5    0.011    0.012    4.167    4.167
 dbcsr_complete_redistribute        325 12.2    0.437    0.582    2.922    4.070
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.915    4.060
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.950    3.957
 cp_fm_diag_elpa_base                48 14.0    3.783    3.849    3.947    3.955
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.751    3.753
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.576    3.636
 multiply_cannon_metrocomm3       24660 15.4    0.038    0.039    1.661    3.494
 grid_integrate_task_list           110 12.3    3.244    3.488    3.244    3.488
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.334    3.474
 cp_fm_cholesky_invert               11 10.9    3.400    3.411    3.400    3.411
 calculate_dm_sparse                110  9.5    0.001    0.001    3.355    3.384
 fft_wrap_pw1pw2                   1111 11.6    0.013    0.016    3.232    3.271
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.229    3.271
 make_images_data                  4110 15.4    0.048    0.052    2.900    3.259
 hybrid_alltoall_any               4261 16.3    0.122    0.460    2.397    3.242
 density_rs2pw                      110  9.6    0.004    0.004    2.999    3.241
 wfi_extrapolate                     11  7.9    0.001    0.001    3.108    3.108
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.789    2.909
 mp_alltoall_i22                    605 13.7    1.696    2.881    1.696    2.881
 acc_transpose_blocks             24660 15.4    0.106    0.108    2.578    2.742
 fft_wrap_pw1pw2_140                451 12.1    0.146    0.148    2.657    2.701
 multiply_cannon_sync_h2d         24660 15.4    2.390    2.553    2.390    2.553
 fft3d_ps                          1111 13.6    1.123    1.167    2.502    2.531
 grid_collocate_task_list           110  9.6    2.262    2.489    2.262    2.489
 qs_energies_init_hamiltonians       11  5.9    0.005    0.015    2.271    2.274
 potential_pw2rs                    110 12.3    0.012    0.013    2.214    2.227
 mp_alltoall_d11v                  2046 13.8    1.835    2.183    1.835    2.183
 jit_kernel_multiply                  8 15.7    1.056    2.151    1.056    2.151
 cp_fm_cholesky_decompose            22 10.9    1.927    1.966    1.927    1.966
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.881    1.913
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.890    1.911
 mp_sum_l                          6594 12.7    1.220    1.802    1.220    1.802
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.614    1.710
 mp_allgather_i34                  2055 14.4    0.668    1.705    0.668    1.705
 transfer_rs2pw                     451 10.6    0.005    0.006    1.433    1.700
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.657    1.668
 multiply_cannon_metrocomm4       20550 15.4    0.059    0.062    0.850    1.621
 mp_irecv_dv                      62702 16.1    0.746    1.536    0.746    1.536
 acc_transpose_blocks_sync        73980 16.4    1.367    1.506    1.367    1.506
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=72.312000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=698.272727, yerr=7.398905
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             870.273024E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66417.
 MP_Allreduce         9774                    644.
 MP_Sync                52
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.033    0.061   57.582   57.583
 qs_mol_dyn_low                       1  2.0    0.003    0.003   56.860   56.879
 qs_forces                           11  3.9    0.033    0.059   56.626   56.628
 qs_energies                         11  4.9    0.045    0.048   52.959   52.976
 scf_env_do_scf                      11  5.9    0.000    0.001   43.516   43.517
 scf_env_do_scf_inner_loop           99  6.5    0.018    0.030   35.646   35.647
 velocity_verlet                     10  3.0    0.019    0.020   31.350   31.362
 dbcsr_multiply_generic            2055 12.4    0.132    0.135   24.452   24.601
 qs_scf_new_mos                      99  7.5    0.001    0.001   21.313   21.394
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   21.312   21.393
 ot_scf_mini                         99  9.5    0.002    0.002   20.045   20.119
 multiply_cannon                   2055 13.4    0.238    0.246   18.720   19.990
 multiply_cannon_loop              2055 14.4    0.603    0.622   17.437   17.707
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.592   11.664
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.015   11.592   11.664
 ot_mini                             99 10.5    0.001    0.001   11.133   11.199
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.315   10.381
 multiply_cannon_multrec           8220 15.4    3.322    4.660    8.294    9.524
 init_scf_loop                       11  6.9    0.000    0.000    7.822    7.823
 mp_waitall_1                    103326 16.6    6.071    7.816    6.071    7.816
 qs_ot_get_derivative                99 11.5    0.001    0.001    7.369    7.442
 init_scf_run                        11  5.9    0.000    0.001    6.236    6.236
 scf_env_initial_rho_setup           11  6.9    0.010    0.015    6.235    6.235
 prepare_preconditioner              11  7.9    0.000    0.000    6.137    6.147
 make_preconditioner                 11  8.9    0.000    0.000    6.137    6.147
 sum_up_and_integrate               110 10.3    0.001    0.002    6.103    6.116
 integrate_v_rspace                 110 11.3    0.003    0.003    6.076    6.089
 dbcsr_mm_accdrv_process          17442 15.9    3.400    4.515    4.830    5.995
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.706    5.778
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.738    5.751
 calculate_rho_elec                 110  8.6    0.115    0.116    5.737    5.751
 qs_ot_get_p                        110 10.4    0.013    0.025    4.936    5.008
 make_m2s                          4110 13.4    0.037    0.038    4.307    4.556
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.019    3.090    4.546
 make_images                       4110 14.4    0.646    0.707    4.176    4.422
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.791    3.841
 apply_single                       110 13.6    0.000    0.000    3.791    3.841
 ot_diis_step                        99 11.5    0.012    0.013    3.741    3.742
 grid_integrate_task_list           110 12.3    3.379    3.500    3.379    3.500
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.484    3.488
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.434    3.437
 fft_wrap_pw1pw2                   1111 11.6    0.014    0.015    3.377    3.383
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.135    3.136
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.962    2.965
 cp_fm_cholesky_invert               11 10.9    2.943    2.947    2.943    2.947
 fft_wrap_pw1pw2_140                451 12.1    0.192    0.195    2.901    2.910
 density_rs2pw                      110  9.6    0.003    0.004    2.727    2.839
 calculate_dm_sparse                110  9.5    0.001    0.001    2.790    2.831
 qs_energies_init_hamiltonians       11  5.9    0.003    0.006    2.692    2.707
 wfi_extrapolate                     11  7.9    0.001    0.001    2.697    2.698
 hybrid_alltoall_any               4261 16.3    0.201    0.841    2.304    2.690
 make_images_data                  4110 15.4    0.041    0.047    2.299    2.626
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.549    2.576
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.545    2.553
 cp_fm_diag_elpa_base                48 14.0    2.489    2.515    2.543    2.551
 fft3d_ps                          1111 13.6    1.274    1.285    2.534    2.542
 multiply_cannon_sync_h2d          8220 15.4    2.349    2.504    2.349    2.504
 grid_collocate_task_list           110  9.6    2.364    2.443    2.364    2.443
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.117    2.166
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.002    2.033
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.791    1.994
 mp_alltoall_d11v                  2046 13.8    1.773    1.991    1.773    1.991
 potential_pw2rs                    110 12.3    0.015    0.015    1.871    1.879
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.727    1.740
 cp_fm_cholesky_decompose            22 10.9    1.713    1.728    1.713    1.728
 jit_kernel_multiply                  7 15.6    1.115    1.645    1.115    1.645
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.511    1.639
 acc_transpose_blocks              8220 15.4    0.038    0.040    1.561    1.623
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.606    1.613
 dbcsr_complete_redistribute        325 12.2    0.591    0.628    1.525    1.613
 mp_allgather_i34                  2055 14.4    0.452    1.557    0.452    1.557
 multiply_cannon_metrocomm4        6165 15.4    0.018    0.020    0.477    1.392
 mp_irecv_dv                      24056 15.7    0.450    1.345    0.450    1.345
 qs_create_task_list                 11  7.9    0.000    0.000    1.219    1.317
 generate_qs_task_list               11  8.9    0.376    0.445    1.218    1.316
 transfer_rs2pw                     451 10.6    0.005    0.005    1.150    1.292
 mp_waitany                        9240 13.8    1.085    1.247    1.085    1.247
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.217    1.240
 multiply_cannon_metrocomm1        8220 15.4    0.022    0.022    0.850    1.206
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=57.583000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=820.000000, yerr=15.521246
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.409675E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67098.
 MP_Allreduce         9752                    812.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.019    0.039   95.940   95.940
 qs_mol_dyn_low                       1  2.0    0.003    0.003   95.300   95.311
 qs_forces                           11  3.9    0.025    0.046   94.991   94.992
 qs_energies                         11  4.9    0.021    0.023   90.790   90.803
 scf_env_do_scf                      11  5.9    0.000    0.001   79.297   79.297
 velocity_verlet                     10  3.0    0.008    0.008   59.364   59.399
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   48.382   48.385
 dbcsr_multiply_generic            2055 12.4    0.124    0.128   32.166   32.234
 init_scf_loop                       11  6.9    0.000    0.000   30.840   30.845
 qs_scf_new_mos                      99  7.5    0.001    0.001   30.248   30.266
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   30.247   30.265
 prepare_preconditioner              11  7.9    0.000    0.000   28.616   28.638
 make_preconditioner                 11  8.9    0.000    0.000   28.616   28.638
 ot_scf_mini                         99  9.5    0.002    0.002   28.361   28.377
 make_full_inverse_cholesky          11  9.9    0.000    0.000   22.457   28.048
 multiply_cannon                   2055 13.4    0.330    0.349   24.269   25.374
 multiply_cannon_loop              2055 14.4    0.825    0.846   22.294   22.987
 cp_fm_upper_to_full                 70 14.2   13.086   18.935   13.086   18.935
 ot_mini                             99 10.5    0.001    0.001   15.666   15.692
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.224   14.241
 qs_ks_build_kohn_sham_matrix       110  9.3    0.040    0.051   14.224   14.240
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.915   12.934
 dbcsr_complete_redistribute        325 12.2    1.030    1.048    7.876   11.338
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.897   10.910
 multiply_cannon_multrec           8220 15.4    4.251    4.415   10.396   10.599
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.804   10.268
 mp_waitall_1                     84994 16.7    8.816    9.705    8.816    9.705
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.144    9.566
 mp_alltoall_i22                    605 13.7    5.770    9.238    5.770    9.238
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.462    7.502
 calculate_rho_elec                 110  8.6    0.227    0.227    7.461    7.501
 qs_ot_get_p                        110 10.4    0.004    0.004    7.229    7.250
 sum_up_and_integrate               110 10.3    0.002    0.002    7.169    7.184
 integrate_v_rspace                 110 11.3    0.003    0.003    7.141    7.156
 init_scf_run                        11  5.9    0.000    0.001    7.038    7.038
 scf_env_initial_rho_setup           11  6.9    0.005    0.005    7.038    7.038
 cp_fm_cholesky_invert               11 10.9    6.377    6.380    6.377    6.380
 dbcsr_mm_accdrv_process          11614 15.7    4.123    4.350    5.995    6.267
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.019    5.646    6.241
 make_m2s                          4110 13.4    0.043    0.044    5.793    6.225
 make_images                       4110 14.4    0.880    0.934    5.602    6.035
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.913    5.335
 apply_single                       110 13.6    0.000    0.000    4.913    5.335
 qs_ot_p2m_diag                      48 11.0    0.151    0.156    5.253    5.257
 fft_wrap_pw1pw2                   1111 11.6    0.016    0.016    4.974    4.988
 ot_diis_step                        99 11.5    0.015    0.015    4.723    4.723
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    4.684    4.685
 fft_wrap_pw1pw2_140                451 12.1    0.364    0.365    4.252    4.270
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.959    3.959
 cp_fm_diag_elpa_base                48 14.0    3.396    3.605    3.956    3.956
 density_rs2pw                      110  9.6    0.004    0.004    3.846    3.870
 fft3d_ps                          1111 13.6    1.841    1.873    3.833    3.849
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.340    3.836
 grid_integrate_task_list           110 12.3    3.695    3.776    3.695    3.776
 qs_energies_init_hamiltonians       11  5.9    0.022    0.045    3.754    3.766
 hybrid_alltoall_any               4261 16.3    0.262    0.561    3.027    3.698
 make_images_data                  4110 15.4    0.045    0.048    3.004    3.651
 calculate_dm_sparse                110  9.5    0.001    0.001    3.541    3.582
 wfi_extrapolate                     11  7.9    0.001    0.001    3.521    3.522
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.391    3.393
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.347    3.366
 multiply_cannon_sync_h2d          8220 15.4    3.127    3.165    3.127    3.165
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.056    3.064
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.775    2.785
 grid_collocate_task_list           110  9.6    2.682    2.717    2.682    2.717
 potential_pw2rs                    110 12.3    0.021    0.021    2.589    2.597
 cp_fm_cholesky_decompose            22 10.9    2.496    2.521    2.496    2.521
 mp_alltoall_d11v                  2046 13.8    2.311    2.383    2.311    2.383
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    2.220    2.283
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.100    2.190
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.967    1.985
 acc_transpose_blocks              8220 15.4    0.040    0.040    1.917    1.973
 qs_create_task_list                 11  7.9    0.000    0.000    1.899    1.943
 generate_qs_task_list               11  8.9    0.730    0.781    1.899    1.942
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.916    1.919
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=95.940000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1273.909091, yerr=58.232222
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             629.325824E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955383376
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  57761.
 MP_Allreduce        11084                    796.
 MP_Sync                87
 MP_Alltoall          2226                2488523.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.111    0.247  220.202  220.212
 qs_mol_dyn_low                       1  2.0    0.057    0.158  218.771  218.783
 qs_forces                           11  3.9    0.006    0.008  218.579  218.639
 qs_energies                         11  4.9    0.017    0.104  212.859  212.929
 scf_env_do_scf                      11  5.9    0.004    0.026  194.993  194.997
 scf_env_do_scf_inner_loop          117  6.6    0.032    0.179  173.119  173.123
 qs_scf_new_mos                     117  7.6    0.001    0.001  132.020  132.320
 qs_scf_loop_do_ot                  117  8.6    0.002    0.007  132.020  132.319
 dbcsr_multiply_generic            2507 12.6    0.189    0.209  128.576  129.641
 velocity_verlet                     10  3.0    0.022    0.058  128.498  128.510
 ot_scf_mini                        117  9.6    0.003    0.005  125.289  125.596
 multiply_cannon                   2507 13.6    0.239    0.247  103.002  104.693
 multiply_cannon_loop              2507 14.6    2.402    2.460  100.644  102.325
 ot_mini                            117 10.6    0.001    0.001   68.603   68.918
 qs_ot_get_derivative               117 11.6    0.001    0.001   43.481   43.767
 multiply_cannon_multrec          60168 15.6   31.798   33.774   41.838   43.644
 rebuild_ks_matrix                  128  8.3    0.001    0.001   34.666   35.008
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   34.666   35.007
 qs_ot_get_p                        128 10.4    0.001    0.001   34.132   34.471
 mp_waitall_1                    267128 16.5   30.827   34.118   30.827   34.118
 qs_ks_update_qs_env                128  7.6    0.001    0.001   31.229   31.533
 multiply_cannon_sync_h2d         60168 15.6   26.284   28.056   26.284   28.056
 qs_ot_p2m_diag                      83 11.4    0.079    0.091   27.083   27.133
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.522   26.069
 apply_single                       128 13.6    0.001    0.001   24.521   26.068
 ot_diis_step                       117 11.6    0.008    0.008   24.903   24.904
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   24.116   24.118
 init_scf_loop                       11  6.9    0.024    0.187   21.789   21.791
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   21.236   21.435
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   20.635   20.668
 cp_fm_diag_elpa_base                83 14.4   20.537   20.597   20.629   20.664
 multiply_cannon_metrocomm3       60168 15.6    0.117    0.122   16.697   18.985
 prepare_preconditioner              11  7.9    0.000    0.000   16.914   16.945
 make_preconditioner                 11  8.9    0.000    0.002   16.914   16.945
 make_full_inverse_cholesky          11  9.9    0.000    0.000   16.128   16.333
 make_m2s                          5014 13.6    0.105    0.114   14.526   14.908
 make_images                       5014 14.6    0.399    0.418   14.341   14.732
 sum_up_and_integrate               128 10.3    0.002    0.004   14.636   14.652
 integrate_v_rspace                 128 11.3    0.003    0.004   14.578   14.596
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.916   14.097
 calculate_rho_elec                 128  8.7    0.045    0.065   13.915   14.096
 init_scf_run                        11  5.9    0.000    0.002   13.279   13.283
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   13.279   13.283
 mp_sum_l                          7950 12.9    9.957   11.137    9.957   11.137
 dbcsr_mm_accdrv_process         124484 16.2    4.772    4.924    9.603   10.199
 cp_fm_cholesky_invert               11 10.9    9.697    9.706    9.697    9.706
 wfi_extrapolate                     11  7.9    0.001    0.001    9.345    9.345
 calculate_dm_sparse                128  9.5    0.001    0.001    8.700    8.775
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.404    8.572
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    8.279    8.354
 multiply_cannon_metrocomm1       60168 15.6    0.090    0.096    6.705    8.268
 density_rs2pw                      128  9.7    0.005    0.006    7.357    8.077
 make_images_data                  5014 15.6    0.065    0.072    7.055    8.012
 grid_integrate_task_list           128 12.3    7.045    7.610    7.045    7.610
 hybrid_alltoall_any               5200 16.5    0.294    2.270    6.152    7.494
 fft_wrap_pw1pw2                   1291 11.7    0.018    0.026    7.068    7.399
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.989    7.003
 fft3d_ps                          1291 13.7    2.094    2.597    5.894    6.167
 fft_wrap_pw1pw2_140                523 12.2    0.234    0.251    5.815    6.078
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.900    6.000
 mp_alltoall_d11v                  2415 14.1    4.699    5.883    4.699    5.883
 grid_collocate_task_list           128  9.7    4.836    5.164    4.836    5.164
 potential_pw2rs                    128 12.3    0.009    0.010    5.087    5.159
 cp_fm_cholesky_decompose            22 10.9    5.005    5.017    5.005    5.017
 transfer_rs2pw                     523 10.6    0.008    0.009    3.939    4.723
 mp_sum_d                          4474 12.1    3.789    4.662    3.789    4.662
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=220.212000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=594.545455, yerr=7.062882
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430456039424       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992003932160       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613072052224       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239176077312       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239176077312       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.233020E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.200017E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806383904       0.0%      0.0%    100.0%
 number of processed stacks               6024768       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1129.7
 marketing flops                   145.651870E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank             842.002432E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2427840
 MPI messages size (bytes):
  total size                         4.132588E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.702167E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               71532               2339635200
     32768 < size <=   131072              729952              56049532928
    131072 < size <=  4194304             1387568            1410045313024
   4194304 < size <= 16777216              155760            1473828901424
  16777216 < size                           68112            1190343475200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4043                  57636.
 MP_Allreduce        11185                    956.
 MP_Sync                88
 MP_Alltoall          1983                5154197.
 MP_SendRecv         12126                  47072.
 MP_ISendRecv        12126                  47072.
 MP_Wait             26114
 MP_ISend            11836                 212447.
 MP_IRecv            11836                 212447.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.143    0.213  200.116  200.147
 qs_mol_dyn_low                       1  2.0    0.051    0.145  199.098  199.114
 qs_forces                           11  3.9    0.005    0.006  198.658  198.711
 qs_energies                         11  4.9    0.022    0.128  191.861  191.917
 scf_env_do_scf                      11  5.9    0.021    0.163  173.724  173.742
 scf_env_do_scf_inner_loop          118  6.6    0.047    0.300  138.155  138.171
 velocity_verlet                     10  3.0    0.103    0.312  122.890  122.904
 dbcsr_multiply_generic            2529 12.6    0.203    0.209   99.332  100.712
 qs_scf_new_mos                     118  7.6    0.001    0.001   98.589   99.298
 qs_scf_loop_do_ot                  118  8.6    0.021    0.158   98.588   99.297
 ot_scf_mini                        118  9.6    0.004    0.005   93.699   94.388
 multiply_cannon                   2529 13.6    0.511    0.566   78.273   83.196
 multiply_cannon_loop              2529 14.6    1.581    1.650   74.694   77.527
 ot_mini                            118 10.6    0.001    0.001   51.926   52.728
 mp_waitall_1                    216598 16.6   24.746   39.205   24.746   39.205
 multiply_cannon_multrec          30348 15.6   21.127   25.576   31.853   36.809
 init_scf_loop                       11  6.9    0.017    0.135   35.444   35.459
 rebuild_ks_matrix                  129  8.3    0.001    0.001   34.167   34.807
 qs_ks_build_kohn_sham_matrix       129  9.3    0.119    0.542   34.166   34.806
 qs_ks_update_qs_env                129  7.6    0.001    0.001   30.941   31.549
 qs_ot_get_derivative               118 11.6    0.001    0.002   29.696   30.408
 prepare_preconditioner              11  7.9    0.000    0.000   29.762   29.879
 make_preconditioner                 11  8.9    0.019    0.148   29.762   29.879
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.395   29.019
 multiply_cannon_metrocomm3       30348 15.6    0.096    0.101   15.896   28.692
 qs_ot_get_p                        129 10.4    0.007    0.014   23.663   24.272
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   22.322   23.620
 apply_single                       129 13.6    0.001    0.001   22.322   23.620
 ot_diis_step                       118 11.6    0.015    0.015   22.052   22.054
 multiply_cannon_sync_h2d         30348 15.6   18.131   20.372   18.131   20.372
 qs_ot_p2m_diag                      84 11.4    0.191    0.219   18.542   18.580
 cp_dbcsr_syevd                      84 12.4    0.011    0.046   17.266   17.268
 cp_fm_cholesky_invert               11 10.9   17.215   17.228   17.215   17.228
 make_m2s                          5058 13.6    0.087    0.093   14.556   16.315
 make_images                       5058 14.6    1.173    1.375   14.337   16.099
 sum_up_and_integrate               129 10.3    0.025    0.183   14.969   15.035
 integrate_v_rspace                 129 11.3    0.003    0.004   14.861   14.979
 qs_rho_update_rho_low              129  7.7    0.001    0.001   13.856   13.926
 calculate_rho_elec                 129  8.7    0.092    0.137   13.855   13.925
 cp_fm_diag_elpa                     84 13.4    0.000    0.000   13.710   13.744
 cp_fm_diag_elpa_base                84 14.4   13.428   13.542   13.700   13.729
 init_scf_run                        11  5.9    0.000    0.001   12.723   12.724
 scf_env_initial_rho_setup           11  6.9    0.002    0.006   12.723   12.724
 qs_ot_get_derivative_diag           78 12.4    0.002    0.002   11.682   12.171
 multiply_cannon_metrocomm4       27819 15.6    0.107    0.122    3.893   11.035
 dbcsr_mm_accdrv_process          62758 16.2    5.512    6.427   10.170   10.856
 mp_irecv_dv                      70084 16.3    3.681   10.613    3.681   10.613
 make_images_data                  5058 15.6    0.066    0.076    8.341   10.375
 hybrid_alltoall_any               5245 16.5    0.353    1.522    7.242    9.626
 wfi_extrapolate                     11  7.9    0.006    0.045    8.433    8.433
 fft_wrap_pw1pw2                   1301 11.7    0.019    0.027    8.120    8.294
 density_rs2pw                      129  9.7    0.009    0.037    7.261    7.802
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    7.012    7.792
 grid_integrate_task_list           129 12.3    7.235    7.740    7.235    7.740
 cp_fm_cholesky_decompose            22 10.9    7.332    7.403    7.332    7.403
 fft_wrap_pw1pw2_140                527 12.2    0.253    0.275    7.050    7.196
 mp_sum_l                          8016 12.9    4.825    6.873    4.825    6.873
 fft3d_ps                          1301 13.7    2.785    2.956    6.547    6.724
 calculate_dm_sparse                129  9.5    0.001    0.001    6.578    6.721
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.271    6.287
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    5.510    5.582
 potential_pw2rs                    129 12.3    0.016    0.031    5.366    5.478
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.272    5.458
 grid_collocate_task_list           129  9.7    5.040    5.422    5.040    5.422
 mp_alltoall_d11v                  2429 14.1    4.540    5.352    4.540    5.352
 mp_allgather_i34                  2529 14.6    2.086    4.991    2.086    4.991
 mp_sum_d                          4513 12.1    3.068    4.432    3.068    4.432
 dbcsr_complete_redistribute        397 12.7    0.792    0.906    3.362    4.240
 transfer_rs2pw                     527 10.6    0.007    0.008    3.584    4.125
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=200.147000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=801.818182, yerr=2.724241
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022121472       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444702699520       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796573E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.906045E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499744       0.0%      0.0%    100.0%
 number of processed stacks               3951168       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1697.1
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             993.517568E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1033760
 MPI messages size (bytes):
  total size                         2.695213E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.607194E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              279168              36591108096
    131072 < size <=  4194304              654272             987691483136
   4194304 < size <= 16777216               65184             925172769472
  16777216 < size                           28448             745747251200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  58192.
 MP_Allreduce        11085                   1000.
 MP_Sync                86
 MP_Alltoall          1700                9383497.
 MP_SendRecv          7874                  75008.
 MP_ISendRecv         7874                  75008.
 MP_Wait             21654
 MP_ISend            11660                 275234.
 MP_IRecv            11660                 275234.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.044    0.127  184.482  184.487
 qs_mol_dyn_low                       1  2.0    0.044    0.119  183.632  183.645
 qs_forces                           11  3.9    0.006    0.013  183.303  183.344
 qs_energies                         11  4.9    0.054    0.418  176.329  176.664
 scf_env_do_scf                      11  5.9    0.001    0.004  159.153  159.155
 scf_env_do_scf_inner_loop          116  6.6    0.010    0.057  122.644  122.644
 velocity_verlet                     10  3.0    0.068    0.172  114.191  114.210
 qs_scf_new_mos                     116  7.6    0.001    0.001   84.879   85.252
 qs_scf_loop_do_ot                  116  8.6    0.001    0.002   84.878   85.251
 dbcsr_multiply_generic            2485 12.5    0.192    0.197   83.420   84.723
 ot_scf_mini                        116  9.6    0.004    0.007   80.650   81.113
 multiply_cannon                   2485 13.5    0.502    0.520   63.230   66.885
 multiply_cannon_loop              2485 14.5    1.117    1.178   59.918   63.090
 ot_mini                            116 10.6    0.001    0.001   43.503   43.970
 init_scf_loop                       11  6.9    0.004    0.035   36.406   36.407
 mp_waitall_1                    169034 16.6   25.164   34.759   25.164   34.759
 prepare_preconditioner              11  7.9    0.000    0.000   32.247   32.292
 make_preconditioner                 11  8.9    0.005    0.039   32.247   32.292
 rebuild_ks_matrix                  127  8.3    0.001    0.001   30.886   31.339
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.030   30.886   31.339
 make_full_inverse_cholesky          11  9.9    0.000    0.000   29.736   31.226
 qs_ks_update_qs_env                127  7.6    0.001    0.001   27.875   28.293
 multiply_cannon_multrec          19880 15.5   13.006   15.966   22.852   25.785
 multiply_cannon_metrocomm3       19880 15.5    0.059    0.062   15.162   24.617
 qs_ot_get_derivative               116 11.6    0.001    0.002   24.109   24.584
 qs_ot_get_p                        127 10.4    0.002    0.003   22.420   22.969
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.522   20.514
 apply_single                       127 13.6    0.001    0.001   19.522   20.513
 ot_diis_step                       116 11.6    0.018    0.018   19.288   19.288
 qs_ot_p2m_diag                      82 11.4    0.262    0.269   17.868   17.880
 cp_dbcsr_syevd                      82 12.4    0.042    0.303   16.772   16.773
 make_m2s                          4970 13.5    0.076    0.080   15.010   16.452
 make_images                       4970 14.5    1.137    1.237   14.776   16.214
 multiply_cannon_sync_h2d         19880 15.5   13.659   15.681   13.659   15.681
 cp_fm_cholesky_invert               11 10.9   15.172   15.181   15.172   15.181
 sum_up_and_integrate               127 10.3    0.002    0.003   14.166   14.191
 integrate_v_rspace                 127 11.3    0.003    0.004   14.106   14.134
 qs_rho_update_rho_low              127  7.7    0.001    0.001   13.734   13.798
 calculate_rho_elec                 127  8.7    0.131    0.145   13.733   13.797
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   13.121   13.178
 cp_fm_diag_elpa_base                82 14.4   12.446   12.620   12.873   12.894
 init_scf_run                        11  5.9    0.000    0.001   11.540   11.540
 scf_env_initial_rho_setup           11  6.9    0.003    0.008   11.540   11.540
 make_images_data                  4970 15.5    0.061    0.072    8.894   10.882
 hybrid_alltoall_any               5155 16.4    0.447    2.043    7.808   10.052
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.411    9.747
 dbcsr_mm_accdrv_process          41158 16.2    6.013    6.581    9.305    9.521
 multiply_cannon_metrocomm4       17395 15.5    0.066    0.077    3.453    9.403
 mp_irecv_dv                      49801 16.2    3.324    9.144    3.324    9.144
 fft_wrap_pw1pw2                   1281 11.7    0.019    0.024    7.812    7.930
 cp_fm_cholesky_decompose            22 10.9    7.757    7.857    7.757    7.857
 grid_integrate_task_list           127 12.3    7.232    7.775    7.232    7.775
 wfi_extrapolate                     11  7.9    0.001    0.001    7.576    7.576
 density_rs2pw                      127  9.7    0.006    0.010    7.046    7.488
 cp_fm_upper_to_full                104 14.8    5.746    7.392    5.746    7.392
 fft_wrap_pw1pw2_140                519 12.2    0.283    0.293    6.716    6.864
 dbcsr_complete_redistribute        393 12.7    1.179    1.263    4.961    6.699
 fft3d_ps                          1281 13.7    2.742    2.969    6.132    6.209
 calculate_dm_sparse                127  9.5    0.001    0.001    5.968    6.086
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.978    5.990
 mp_alltoall_d11v                  2401 14.1    4.609    5.754    4.609    5.754
 grid_collocate_task_list           127  9.7    5.108    5.592    5.108    5.592
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.778    5.502
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002    3.757    5.494
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.948    5.082
 mp_sum_l                          7884 12.9    3.390    4.843    3.390    4.843
 mp_allgather_i34                  2485 14.5    1.822    4.793    1.822    4.793
 potential_pw2rs                    127 12.3    0.020    0.022    4.605    4.643
 qs_energies_init_hamiltonians       11  5.9    0.007    0.036    4.173    4.216
 transfer_fm_to_dbcsr                11  9.9    0.019    0.025    2.485    4.185
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    3.992    4.031
 mp_alltoall_i22                    712 14.1    2.114    3.986    2.114    3.986
 transfer_rs2pw                     519 10.6    0.007    0.007    3.445    3.966
 mp_sum_d                          4454 12.1    2.799    3.882    2.799    3.882
 calculate_first_density_matrix       1  7.0    0.003    0.025    3.750    3.755
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=184.487000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=932.454545, yerr=25.349784
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410023282688       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444707676160       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796579E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.320339E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705500928       0.0%      0.0%    100.0%
 number of processed stacks               5927808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1131.2
 marketing flops                   143.508480E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               1.150898E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1133160
 MPI messages size (bytes):
  total size                         2.008142E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.772161E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              315952              35695099904
    131072 < size <=  4194304              709496             778939400192
   4194304 < size <= 16777216               69840             660837789680
  16777216 < size                           30480             532676608000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  58189.
 MP_Allreduce        11085                   1083.
 MP_Sync                86
 MP_Alltoall          1700               12496381.
 MP_SendRecv          5842                  75008.
 MP_ISendRecv         5842                  75008.
 MP_Wait             22272
 MP_ISend            14840                 244848.
 MP_IRecv            14840                 244848.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.048    0.125  194.188  194.191
 qs_mol_dyn_low                       1  2.0    0.020    0.140  193.371  193.382
 qs_forces                           11  3.9    0.004    0.005  193.050  193.063
 qs_energies                         11  4.9    0.024    0.165  185.866  185.879
 scf_env_do_scf                      11  5.9    0.006    0.040  166.906  166.925
 velocity_verlet                     10  3.0    0.028    0.043  124.197  124.218
 scf_env_do_scf_inner_loop          116  6.6    0.024    0.167  118.697  118.698
 qs_scf_new_mos                     116  7.6    0.001    0.001   82.925   83.216
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   82.924   83.216
 dbcsr_multiply_generic            2485 12.5    0.195    0.200   80.449   81.318
 ot_scf_mini                        116  9.6    0.003    0.004   78.403   78.704
 multiply_cannon                   2485 13.5    0.546    0.575   55.654   59.268
 multiply_cannon_loop              2485 14.5    1.815    1.893   51.819   53.911
 init_scf_loop                       11  6.9    0.016    0.130   48.082   48.096
 ot_mini                            116 10.6    0.001    0.001   42.941   43.243
 prepare_preconditioner              11  7.9    0.000    0.000   42.686   42.772
 make_preconditioner                 11  8.9    0.010    0.079   42.686   42.772
 make_full_inverse_cholesky          11  9.9    0.010    0.023   36.376   41.311
 multiply_cannon_multrec          29820 15.5   13.491   18.796   26.384   31.341
 rebuild_ks_matrix                  127  8.3    0.001    0.001   30.263   30.626
 qs_ks_build_kohn_sham_matrix       127  9.3    0.148    0.682   30.262   30.626
 qs_ks_update_qs_env                127  7.6    0.001    0.001   27.466   27.799
 mp_waitall_1                    146592 16.7   16.948   26.486   16.948   26.486
 qs_ot_get_derivative               116 11.6    0.001    0.002   23.367   23.672
 qs_ot_get_p                        127 10.4    0.012    0.017   21.690   22.023
 make_m2s                          4970 13.5    0.092    0.097   20.196   21.429
 make_images                       4970 14.5    1.920    2.195   19.887   21.117
 apply_preconditioner_dbcsr         127 12.6    0.000    0.001   18.946   19.562
 apply_single                       127 13.6    0.001    0.001   18.945   19.562
 ot_diis_step                       116 11.6    0.018    0.018   19.444   19.446
 qs_ot_p2m_diag                      82 11.4    0.339    0.385   17.371   17.426
 cp_fm_upper_to_full                104 14.8   11.351   16.665   11.351   16.665
 cp_fm_cholesky_invert               11 10.9   16.358   16.367   16.358   16.367
 cp_dbcsr_syevd                      82 12.4    0.016    0.092   15.958   15.960
 multiply_cannon_metrocomm3       29820 15.5    0.049    0.052    6.186   14.759
 sum_up_and_integrate               127 10.3    0.025    0.185   14.170   14.238
 integrate_v_rspace                 127 11.3    0.003    0.004   14.088   14.180
 qs_rho_update_rho_low              127  7.7    0.001    0.001   13.679   13.753
 calculate_rho_elec                 127  8.7    0.173    0.188   13.679   13.752
 dbcsr_complete_redistribute        393 12.7    1.526    1.695    9.362   13.200
 dbcsr_mm_accdrv_process          61748 16.2    8.364    9.268   12.461   12.944
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   12.628   12.653
 make_images_data                  4970 15.5    0.064    0.070   10.768   12.637
 cp_fm_diag_elpa_base                82 14.4   11.592   11.908   12.557   12.571
 init_scf_run                        11  5.9    0.000    0.001   12.363   12.364
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.363   12.364
 multiply_cannon_sync_h2d         29820 15.5   10.477   11.854   10.477   11.854
 copy_fm_to_dbcsr                   208 11.6    0.001    0.002    7.975   11.782
 hybrid_alltoall_any               5155 16.4    0.523    2.197    9.404   11.430
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.742    9.963
 transfer_fm_to_dbcsr                11  9.9    0.002    0.012    6.262    9.927
 mp_alltoall_i22                    712 14.1    5.813    9.781    5.813    9.781
 fft_wrap_pw1pw2                   1281 11.7    0.019    0.024    8.262    8.377
 cp_fm_cholesky_decompose            22 10.9    7.738    7.840    7.738    7.840
 grid_integrate_task_list           127 12.3    7.462    7.808    7.462    7.808
 wfi_extrapolate                     11  7.9    0.003    0.014    7.611    7.611
 fft_wrap_pw1pw2_140                519 12.2    0.326    0.341    7.333    7.475
 density_rs2pw                      127  9.7    0.011    0.051    6.562    6.941
 multiply_cannon_metrocomm4       24850 15.5    0.078    0.088    2.788    6.918
 mp_irecv_dv                      75445 16.2    2.635    6.633    2.635    6.633
 fft3d_ps                          1281 13.7    2.930    2.965    6.446    6.560
 calculate_dm_sparse                127  9.5    0.001    0.001    6.224    6.329
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.864    5.998
 mp_alltoall_d11v                  2401 14.1    5.007    5.598    5.007    5.598
 grid_collocate_task_list           127  9.7    5.276    5.571    5.276    5.571
 qs_energies_init_hamiltonians       11  5.9    0.003    0.011    4.643    4.652
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.542    4.633
 potential_pw2rs                    127 12.3    0.024    0.038    4.537    4.606
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.432    4.535
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    4.237    4.307
 calculate_first_density_matrix       1  7.0    0.015    0.116    4.125    4.192
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=194.191000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1090.454545, yerr=15.417603
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430454546432       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1973537472512       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992006770688       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613065416704       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239182565376       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239182565376       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.243766E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.910792E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806547424       0.0%      0.0%    100.0%
 number of processed stacks               1978768       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3439.8
 marketing flops                   145.661668E+12
 -------------------------------------------------------------------------------
 # multiplications                           2534
 max memory usage/rank               1.554715E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  243264
 MPI messages size (bytes):
  total size                         1.342058E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.516879E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              115488              60548972544
   4194304 < size <= 16777216              105840             554906419200
  16777216 < size                           20352             726592540656
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         9025                     51.
 MP_Alltoall          9734                 793691.
 MP_ISend            40500                2096702.
 MP_IRecv            40500                2095807.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4043                  57624.
 MP_Allreduce        11184                   1163.
 MP_Sync                88
 MP_Alltoall          1724               18848021.
 MP_SendRecv          3870                 122880.
 MP_ISendRecv         3870                 122880.
 MP_Wait             16244
 MP_ISend            10760                 423501.
 MP_IRecv            10760                 423501.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.044    0.075  183.781  183.781
 qs_mol_dyn_low                       1  2.0    0.003    0.003  183.134  183.147
 qs_forces                           11  3.9    0.025    0.027  183.022  183.026
 qs_energies                         11  4.9    0.003    0.004  175.516  175.521
 scf_env_do_scf                      11  5.9    0.001    0.001  157.364  157.377
 scf_env_do_scf_inner_loop          118  6.6    0.019    0.024  118.526  118.527
 velocity_verlet                     10  3.0    0.022    0.024  118.349  118.353
 qs_scf_new_mos                     118  7.6    0.001    0.001   80.907   80.994
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   80.906   80.994
 ot_scf_mini                        118  9.6    0.003    0.004   76.405   76.474
 dbcsr_multiply_generic            2534 12.6    0.190    0.199   74.237   74.538
 multiply_cannon                   2534 13.6    0.567    0.599   54.543   58.373
 multiply_cannon_loop              2534 14.6    0.822    0.853   51.481   52.133
 ot_mini                            118 10.6    0.001    0.001   38.977   39.015
 init_scf_loop                       11  6.9    0.000    0.000   38.679   38.681
 prepare_preconditioner              11  7.9    0.000    0.000   34.760   34.783
 make_preconditioner                 11  8.9    0.000    0.000   34.760   34.783
 make_full_inverse_cholesky          11  9.9    0.016    0.028   32.541   32.859
 mp_waitall_1                    127116 16.7   25.794   31.791   25.794   31.791
 rebuild_ks_matrix                  129  8.3    0.001    0.001   29.869   29.929
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.018   29.869   29.929
 qs_ks_update_qs_env                129  7.6    0.001    0.001   27.300   27.358
 qs_ot_get_p                        129 10.4    0.025    0.026   24.473   24.545
 qs_ot_get_derivative               118 11.6    0.002    0.002   21.874   21.940
 multiply_cannon_multrec          10136 15.6   10.466   14.862   18.233   21.600
 qs_ot_p2m_diag                      84 11.4    0.502    0.508   20.271   20.289
 cp_fm_cholesky_invert               11 10.9   20.254   20.263   20.254   20.263
 multiply_cannon_metrocomm3       10136 15.6    0.025    0.026   12.579   19.251
 cp_dbcsr_syevd                      84 12.4    0.005    0.006   18.956   18.958
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   17.125   17.387
 apply_single                       129 13.6    0.001    0.001   17.124   17.386
 ot_diis_step                       118 11.6    0.020    0.022   17.017   17.017
 make_m2s                          5068 13.6    0.065    0.070   15.542   16.362
 make_images                       5068 14.6    2.170    2.610   15.230   16.052
 cp_fm_diag_elpa                     84 13.4    0.000    0.000   15.514   15.522
 cp_fm_diag_elpa_base                84 14.4   15.228   15.337   15.509   15.517
 sum_up_and_integrate               129 10.3    0.002    0.002   14.402   14.448
 integrate_v_rspace                 129 11.3    0.004    0.004   14.341   14.389
 qs_rho_update_rho_low              129  7.7    0.001    0.001   14.077   14.112
 calculate_rho_elec                 129  8.7    0.259    0.270   14.076   14.111
 multiply_cannon_sync_h2d         10136 15.6   10.916   11.283   10.916   11.283
 init_scf_run                        11  5.9    0.000    0.001   11.160   11.160
 scf_env_initial_rho_setup           11  6.9    0.002    0.003   11.160   11.160
 make_images_data                  5068 15.6    0.056    0.069    8.884   10.239
 hybrid_alltoall_any               5255 16.5    0.847    3.792    8.616   10.051
 cp_fm_cholesky_decompose            22 10.9    8.707    8.854    8.707    8.854
 qs_ot_get_derivative_diag           78 12.4    0.002    0.003    8.757    8.794
 fft_wrap_pw1pw2                   1301 11.7    0.018    0.020    8.389    8.422
 grid_integrate_task_list           129 12.3    7.828    8.227    7.828    8.227
 dbcsr_mm_accdrv_process          20954 16.1    3.175    4.246    7.415    8.127
 wfi_extrapolate                     11  7.9    0.001    0.001    7.574    7.574
 multiply_cannon_metrocomm1       10136 15.6    0.030    0.031    4.832    7.533
 fft_wrap_pw1pw2_140                527 12.2    0.439    0.446    7.264    7.305
 density_rs2pw                      129  9.7    0.005    0.005    6.673    7.006
 calculate_dm_sparse                129  9.5    0.001    0.001    6.382    6.483
 fft3d_ps                          1301 13.7    3.139    3.241    6.348    6.363
 dbcsr_complete_redistribute        397 12.7    2.079    2.144    5.494    5.920
 grid_collocate_task_list           129  9.7    5.610    5.869    5.610    5.869
 mp_alltoall_d11v                  2429 14.1    4.975    5.796    4.975    5.796
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.403    5.411
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    5.364    5.365
 mp_allgather_i34                  2534 14.6    1.236    4.783    1.236    4.783
 multiply_cannon_metrocomm4        7602 15.6    0.025    0.027    1.796    4.553
 mp_irecv_dv                      29142 15.9    1.757    4.474    1.757    4.474
 potential_pw2rs                    129 12.3    0.026    0.027    4.402    4.415
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.281    4.324
 copy_fm_to_dbcsr                   210 11.7    0.002    0.002    3.583    3.945
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.597    3.907
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.748    3.824
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    3.739    3.811
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=183.781000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1480.090909, yerr=6.841777
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022950912       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963542011904       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444706349056       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019182452736       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019182452736       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796564E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.606412E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499488       0.0%      0.0%    100.0%
 number of processed stacks               1947808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3442.6
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               3.148874E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   99400
 MPI messages size (bytes):
  total size                         1.127422E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.342272E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               44768              34745614336
   4194304 < size <= 16777216               43984             376564613120
  16777216 < size                           10032             716108490000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3991                  59293.
 MP_Allreduce        11055                   1504.
 MP_Sync                86
 MP_Alltoall          1700               36954339.
 MP_SendRecv          1778                 218624.
 MP_ISendRecv         1778                 218624.
 MP_Wait              9728
 MP_ISend             6360                1080477.
 MP_IRecv             6360                1080477.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.068    0.130  287.882  287.883
 qs_mol_dyn_low                       1  2.0    0.003    0.003  287.092  287.129
 qs_forces                           11  3.9    0.009    0.010  286.987  286.989
 qs_energies                         11  4.9    0.002    0.003  278.302  278.306
 scf_env_do_scf                      11  5.9    0.001    0.001  255.381  255.388
 velocity_verlet                     10  3.0    0.018    0.019  207.952  207.960
 scf_env_do_scf_inner_loop          116  6.6    0.012    0.016  130.165  130.167
 init_scf_loop                       11  6.9    0.000    0.000  124.931  124.935
 prepare_preconditioner              11  7.9    0.000    0.000  120.132  120.156
 make_preconditioner                 11  8.9    0.000    0.000  120.132  120.155
 make_full_inverse_cholesky          11  9.9    0.038    0.039   95.663  117.296
 qs_scf_new_mos                     116  7.6    0.001    0.001   88.473   88.567
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   88.472   88.567
 ot_scf_mini                        116  9.6    0.004    0.004   83.704   83.802
 dbcsr_multiply_generic            2485 12.5    0.227    0.238   81.453   82.005
 cp_fm_upper_to_full                104 14.8   53.507   76.973   53.507   76.973
 multiply_cannon                   2485 13.5    0.667    0.708   59.020   59.616
 multiply_cannon_loop              2485 14.5    1.047    1.063   55.069   56.543
 ot_mini                            116 10.6    0.001    0.001   43.998   44.114
 dbcsr_complete_redistribute        393 12.7    4.006    4.049   30.417   43.698
 copy_fm_to_dbcsr                   208 11.6    0.001    0.001   26.970   40.223
 transfer_fm_to_dbcsr                11  9.9    0.030    0.030   24.427   37.571
 mp_alltoall_i22                    712 14.1   22.274   35.330   22.274   35.330
 cp_fm_cholesky_invert               11 10.9   33.116   33.122   33.116   33.122
 rebuild_ks_matrix                  127  8.3    0.001    0.001   33.033   33.095
 qs_ks_build_kohn_sham_matrix       127  9.3    0.019    0.019   33.032   33.094
 mp_waitall_1                    102768 16.8   27.435   31.997   27.435   31.997
 qs_ks_update_qs_env                127  7.6    0.001    0.001   30.712   30.777
 qs_ot_get_p                        127 10.4    0.024    0.033   24.915   25.047
 qs_ot_get_derivative               116 11.6    0.002    0.002   24.633   24.730
 qs_ot_p2m_diag                      82 11.4    0.869    0.874   20.915   20.944
 multiply_cannon_metrocomm3        9940 15.5    0.024    0.025   18.836   20.430
 ot_diis_step                       116 11.6    0.021    0.022   19.337   19.337
 make_m2s                          4970 13.5    0.073    0.075   17.871   19.300
 cp_dbcsr_syevd                      82 12.4    0.006    0.006   19.138   19.140
 make_images                       4970 14.5    3.052    3.233   17.393   18.822
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   18.674   18.772
 apply_single                       127 13.6    0.001    0.001   18.674   18.772
 multiply_cannon_multrec           9940 15.5   10.188   12.015   18.285   18.492
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   16.016   16.017
 cp_fm_diag_elpa_base                82 14.4   11.665   13.245   16.013   16.013
 sum_up_and_integrate               127 10.3    0.002    0.002   15.436   15.527
 qs_rho_update_rho_low              127  7.7    0.001    0.001   15.444   15.523
 calculate_rho_elec                 127  8.7    0.478    0.479   15.443   15.522
 integrate_v_rspace                 127 11.3    0.004    0.005   15.374   15.466
 multiply_cannon_sync_h2d          9940 15.5   14.180   14.208   14.180   14.208
 init_scf_run                        11  5.9    0.000    0.001   12.696   12.697
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   12.696   12.697
 hybrid_alltoall_any               5155 16.4    1.302    3.020   10.188   12.265
 make_images_data                  4970 15.5    0.062    0.067    9.872   11.991
 dbcsr_mm_accdrv_process          20590 16.0    4.453    6.348    7.847    9.710
 qs_ot_get_derivative_diag           76 12.4    0.002    0.003    9.547    9.620
 fft_wrap_pw1pw2                   1281 11.7    0.021    0.021    9.598    9.605
 cp_fm_cholesky_decompose            22 10.9    8.826    8.846    8.826    8.846
 wfi_extrapolate                     11  7.9    0.001    0.001    8.802    8.802
 grid_integrate_task_list           127 12.3    8.496    8.682    8.496    8.682
 fft_wrap_pw1pw2_140                519 12.2    0.818    0.819    8.503    8.517
 qs_energies_init_hamiltonians       11  5.9    0.002    0.003    8.067    8.067
 calculate_dm_sparse                127  9.5    0.001    0.001    6.941    6.987
 fft3d_ps                          1281 13.7    3.954    3.961    6.921    6.953
 mp_alltoall_d11v                  2401 14.1    6.651    6.833    6.651    6.833
 density_rs2pw                      127  9.7    0.005    0.005    6.595    6.637
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.491    6.587
 grid_collocate_task_list           127  9.7    6.363    6.433    6.363    6.433
 copy_dbcsr_to_fm                   185 11.7    0.004    0.004    6.080    6.159
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=287.883000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2804.727273, yerr=173.660113
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.263923E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255646.
 MP_Allreduce         3139                   6114.
 MP_Sync                 4
 MP_Alltoall            54               33980775.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.043    0.299   86.292   86.300
 qs_energies                          1  2.0    0.000    0.000   85.189   85.198
 ls_scf                               1  3.0    0.000    0.000   84.205   84.214
 dbcsr_multiply_generic             111  6.7    0.014    0.016   72.884   73.029
 multiply_cannon                    111  7.7    0.017    0.020   56.193   57.275
 multiply_cannon_loop               111  8.7    0.227    0.242   52.769   54.034
 ls_scf_main                          1  4.0    0.000    0.000   52.489   52.494
 density_matrix_trs4                  2  5.0    0.002    0.003   46.887   46.985
 ls_scf_init_scf                      1  4.0    0.000    0.000   28.680   28.686
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   27.358   27.405
 mp_waitall_1                     11031 10.9   22.379   25.612   22.379   25.612
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.006   25.150   25.171
 multiply_cannon_multrec           2664  9.7    8.143    8.906   15.543   17.265
 multiply_cannon_sync_h2d          2664  9.7   13.751   15.079   13.751   15.079
 make_m2s                           222  7.7    0.009    0.010   13.123   13.707
 make_images                        222  8.7    0.099    0.108   13.101   13.687
 multiply_cannon_metrocomm1        2664  9.7    0.010    0.011    9.648   12.121
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.397    8.252
 make_images_data                   222  9.7    0.004    0.005    7.672    8.138
 dbcsr_mm_accdrv_process           4760 10.4    0.588    0.709    7.020    7.992
 hybrid_alltoall_any                227 10.6    0.217    1.832    6.618    7.989
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.233    7.133    6.233    7.133
 calculate_norms                   4752  9.8    5.534    6.161    5.534    6.161
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.113    5.243
 mp_sum_l                           887  5.1    3.277    4.865    3.277    4.865
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.294    3.652
 multiply_cannon_metrocomm4        2442  9.7    0.011    0.014    2.036    3.540
 mp_irecv_dv                       6231 10.9    2.019    3.513    2.019    3.513
 arnoldi_extremal                     4  6.8    0.000    0.000    3.298    3.331
 arnoldi_normal_ev                    4  7.8    0.001    0.003    3.298    3.331
 make_images_sizes                  222  9.7    0.000    0.000    0.698    3.314
 mp_alltoall_i44                    222 10.7    0.698    3.314    0.698    3.314
 build_subspace                      16  8.4    0.009    0.012    3.205    3.207
 ls_scf_post                          1  4.0    0.000    0.000    3.036    3.046
 ls_scf_store_result                  1  5.0    0.000    0.000    2.851    2.889
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.397    2.790
 dbcsr_merge_single_wm              555 10.7    0.462    0.592    2.388    2.782
 make_images_pack                   222  9.7    2.210    2.631    2.212    2.632
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.014    2.360    2.585
 dbcsr_sort_data                    658 11.4    2.184    2.515    2.184    2.515
 dbcsr_matrix_vector_mult_local     304 10.0    2.065    2.452    2.067    2.454
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.301    2.379
 buffer_matrices_ensure_size        222  8.7    1.751    2.047    1.751    2.047
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.869    1.875
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.859    1.865
 qs_ks_build_kohn_sham_matrix         3  8.3    0.012    0.030    1.859    1.865
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=86.300000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1128.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.174337E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  10075.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.088    0.218   91.839   91.849
 qs_energies                          1  2.0    0.000    0.004   90.911   90.931
 ls_scf                               1  3.0    0.009    0.035   89.357   89.377
 dbcsr_multiply_generic             111  6.7    0.015    0.016   74.200   74.470
 multiply_cannon                    111  7.7    0.028    0.043   52.917   57.188
 ls_scf_main                          1  4.0    0.003    0.039   54.566   54.593
 multiply_cannon_loop               111  8.7    0.135    0.147   50.077   53.247
 density_matrix_trs4                  2  5.0    0.014    0.098   48.769   48.958
 ls_scf_init_scf                      1  4.0    0.004    0.016   31.080   31.112
 mp_waitall_1                      9105 10.9   20.637   29.825   20.637   29.825
 ls_scf_init_matrix_S                 1  5.0    0.012    0.064   28.964   29.081
 multiply_cannon_multrec           1332  9.7   13.308   17.119   22.612   27.595
 matrix_sqrt_Newton_Schulz            2  6.5    0.005    0.016   26.478   26.505
 multiply_cannon_metrocomm3        1332  9.7    0.006    0.008   11.665   20.964
 make_m2s                           222  7.7    0.006    0.008   14.831   15.428
 make_images                        222  8.7    1.371    1.692   14.800   15.398
 dbcsr_mm_accdrv_process           4041 10.4    0.355    0.598    8.902   10.425
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.407    9.893    8.407    9.893
 make_images_data                   222  9.7    0.004    0.005    8.443    9.339
 hybrid_alltoall_any                227 10.6    0.542    2.552    7.817    9.267
 mp_sum_l                           887  5.1    5.085    8.133    5.085    8.133
 multiply_cannon_metrocomm4        1221  9.7    0.007    0.009    3.195    7.613
 mp_irecv_dv                       3311 11.0    3.174    7.555    3.174    7.555
 calculate_norms                   2376  9.8    6.048    6.702    6.048    6.702
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.704    6.273
 multiply_cannon_sync_h2d          1332  9.7    4.893    5.942    4.893    5.942
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.017    5.202
 arnoldi_extremal                     4  6.8    0.000    0.000    4.866    4.907
 arnoldi_normal_ev                    4  7.8    0.002    0.006    4.866    4.907
 build_subspace                      16  8.4    0.014    0.021    4.573    4.576
 ls_scf_post                          1  4.0    0.004    0.018    3.703    3.723
 ls_scf_store_result                  1  5.0    0.000    0.000    3.419    3.496
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.219    3.450
 dbcsr_matrix_vector_mult_local     304 10.0    2.760    3.250    2.762    3.252
 mp_allgather_i34                   111  8.7    0.829    3.028    0.829    3.028
 ls_scf_dm_to_ks                      2  5.0    0.015    0.101    2.647    2.721
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.162    2.541
 dbcsr_data_new                    4174 10.1    2.112    2.406    2.112    2.406
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.324    2.357
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.311    2.344
 qs_ks_build_kohn_sham_matrix         3  8.3    0.033    0.096    2.311    2.344
 ls_scf_initial_guess                 1  5.0    0.000    0.000    2.112    2.219
 ls_scf_qs_atomic_guess               1  6.0    0.000    0.008    2.112    2.219
 make_images_pack                   222  9.7    1.830    2.117    1.832    2.119
 dbcsr_sort_data                    436 11.2    1.838    2.097    1.838    2.097
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=91.849000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1793.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.960896E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265448.
 MP_Allreduce         3138                  10896.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.063    0.145   93.433   93.441
 qs_energies                          1  2.0    0.002    0.008   92.413   92.468
 ls_scf                               1  3.0    0.006    0.023   90.723   90.727
 dbcsr_multiply_generic             111  6.7    0.017    0.029   74.389   74.737
 multiply_cannon                    111  7.7    0.036    0.070   51.568   56.164
 ls_scf_main                          1  4.0    0.002    0.028   55.936   55.948
 multiply_cannon_loop               111  8.7    0.117    0.139   48.844   52.403
 density_matrix_trs4                  2  5.0    0.010    0.051   49.936   50.140
 mp_waitall_1                      7281 11.0   23.035   32.683   23.035   32.683
 ls_scf_init_scf                      1  4.0    0.003    0.021   31.073   31.080
 ls_scf_init_matrix_S                 1  5.0    0.001    0.003   29.237   29.317
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.002   26.761   26.787
 multiply_cannon_multrec            888  9.7   12.698   15.201   21.299   24.426
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   10.530   22.270
 make_m2s                           222  7.7    0.006    0.008   16.462   17.146
 make_images                        222  8.7    1.590    1.850   16.424   17.105
 make_images_data                   222  9.7    0.004    0.004    9.564   10.555
 hybrid_alltoall_any                227 10.6    0.642    2.944    9.031   10.226
 dbcsr_mm_accdrv_process           3754 10.4    0.328    0.535    8.125    9.399
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.680    8.864    7.680    8.864
 mp_sum_l                           887  5.1    4.696    7.931    4.696    7.931
 multiply_cannon_sync_h2d           888  9.7    6.030    7.388    6.030    7.388
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.584    6.951
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.461    6.855
 mp_irecv_dv                       2335 11.1    2.446    6.816    2.446    6.816
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.524    6.333
 arnoldi_extremal                     4  6.8    0.000    0.000    5.238    5.269
 arnoldi_normal_ev                    4  7.8    0.001    0.006    5.238    5.269
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.743    4.989
 build_subspace                      16  8.4    0.014    0.020    4.846    4.853
 calculate_norms                   1584  9.8    4.411    4.834    4.411    4.834
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.466    3.797
 ls_scf_post                          1  4.0    0.001    0.004    3.707    3.713
 mp_allgather_i34                   111  8.7    0.816    3.707    0.816    3.707
 dbcsr_matrix_vector_mult_local     304 10.0    3.054    3.636    3.056    3.638
 ls_scf_store_result                  1  5.0    0.000    0.000    3.429    3.518
 ls_scf_dm_to_ks                      2  5.0    0.003    0.035    2.867    2.985
 dbcsr_data_new                    4116  9.9    2.112    2.460    2.112    2.460
 make_images_sizes                  222  9.7    0.000    0.000    1.061    2.372
 mp_alltoall_i44                    222 10.7    1.060    2.371    1.060    2.371
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.201    2.208
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.183    2.190
 qs_ks_build_kohn_sham_matrix         3  8.3    0.014    0.026    2.183    2.190
 dbcsr_sort_data                    325 11.1    1.870    2.169    1.870    2.169
 ls_scf_initial_guess                 1  5.0    0.000    0.000    1.832    1.934
 ls_scf_qs_atomic_guess               1  6.0    0.000    0.011    1.832    1.934
 dbcsr_finalize                     304  7.8    0.026    0.032    1.623    1.932
 make_images_pack                   222  9.7    1.627    1.892    1.630    1.895
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=93.441000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2286.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.435770E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  13030.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.031    0.076   96.725   96.726
 qs_energies                          1  2.0    0.000    0.000   95.988   95.992
 ls_scf                               1  3.0    0.000    0.000   94.307   94.313
 dbcsr_multiply_generic             111  6.7    0.016    0.018   77.489   77.728
 ls_scf_main                          1  4.0    0.000    0.000   58.218   58.241
 multiply_cannon                    111  7.7    0.050    0.124   51.030   55.409
 density_matrix_trs4                  2  5.0    0.002    0.003   52.067   52.187
 multiply_cannon_loop               111  8.7    0.151    0.165   45.915   49.383
 ls_scf_init_scf                      1  4.0    0.000    0.000   32.848   32.910
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   31.542   31.614
 matrix_sqrt_Newton_Schulz            2  6.5    0.007    0.017   28.910   28.924
 mp_waitall_1                      6369 11.0   21.660   28.437   21.660   28.437
 multiply_cannon_multrec           1332  9.7   14.159   17.538   22.085   25.061
 make_m2s                           222  7.7    0.006    0.008   20.817   22.223
 make_images                        222  8.7    3.147    3.605   20.767   22.175
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.003    8.380   16.440
 make_images_data                   222  9.7    0.004    0.004   11.539   13.136
 hybrid_alltoall_any                227 10.6    0.800    3.825   11.017   12.743
 dbcsr_mm_accdrv_process           3641 10.4    0.297    0.488    7.567    9.087
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.121    8.590    7.121    8.590
 mp_sum_l                           887  5.1    4.065    7.546    4.065    7.546
 multiply_cannon_metrocomm4        1110  9.7    0.005    0.007    2.117    6.098
 mp_irecv_dv                       3229 10.9    2.092    6.010    2.092    6.010
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.097    5.961
 multiply_cannon_sync_h2d          1332  9.7    5.380    5.823    5.380    5.823
 arnoldi_extremal                     4  6.8    0.000    0.000    5.445    5.483
 arnoldi_normal_ev                    4  7.8    0.021    0.042    5.445    5.482
 build_subspace                      16  8.4    0.014    0.021    5.073    5.080
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.570    5.021
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.532    4.730
 mp_allgather_i34                   111  8.7    2.205    4.653    2.205    4.653
 calculate_norms                   2376  9.8    4.227    4.603    4.227    4.603
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.021    3.710    3.983
 dbcsr_matrix_vector_mult_local     304 10.0    3.242    3.745    3.244    3.747
 dbcsr_sort_data                    658 11.4    3.094    3.367    3.094    3.367
 ls_scf_post                          1  4.0    0.000    0.000    3.240    3.244
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.836    3.134
 dbcsr_merge_single_wm              555 10.7    0.542    0.659    2.828    3.126
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.993    3.046
 ls_scf_store_result                  1  5.0    0.000    0.000    2.939    2.994
 dbcsr_data_release               10477 10.7    1.592    2.479    1.592    2.479
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.039    2.099
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.016    2.076
 qs_ks_build_kohn_sham_matrix         3  8.3    0.041    0.101    2.016    2.076
 dbcsr_finalize                     304  7.8    0.049    0.061    1.813    1.995
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=96.726000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2744.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.798054E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265536.
 MP_Allreduce         3129                  15263.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.038    0.070   93.733   93.734
 qs_energies                          1  2.0    0.000    0.000   92.671   92.673
 ls_scf                               1  3.0    0.000    0.000   90.728   90.733
 dbcsr_multiply_generic             111  6.7    0.017    0.019   71.844   72.019
 ls_scf_main                          1  4.0    0.000    0.000   57.369   57.372
 multiply_cannon                    111  7.7    0.080    0.116   53.223   56.559
 multiply_cannon_loop               111  8.7    0.087    0.094   50.574   52.212
 density_matrix_trs4                  2  5.0    0.002    0.003   50.327   50.425
 ls_scf_init_scf                      1  4.0    0.000    0.000   29.947   29.951
 mp_waitall_1                      5436 11.0   25.011   29.331   25.011   29.331
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.468   28.498
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.336   26.351
 multiply_cannon_multrec            444  9.7   13.757   16.058   20.881   22.559
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.938   16.168
 multiply_cannon_metrocomm3         444  9.7    0.001    0.002    6.424   15.267
 make_m2s                           222  7.7    0.005    0.005   13.681   14.636
 make_images                        222  8.7    2.038    2.474   13.614   14.567
 hybrid_alltoall_any                227 10.6    0.800    3.811    8.277    9.941
 make_images_data                   222  9.7    0.003    0.004    8.462    9.811
 multiply_cannon_sync_h2d           444  9.7    6.700    8.338    6.700    8.338
 dbcsr_mm_accdrv_process           3003 10.4    0.377    0.499    6.820    7.965
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.427    7.560    6.427    7.560
 arnoldi_extremal                     4  6.8    0.000    0.000    5.985    5.993
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.985    5.993
 build_subspace                      16  8.4    0.015    0.020    5.580    5.590
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.518    4.739
 mp_sum_l                           887  5.1    2.951    4.602    2.951    4.602
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.021    4.274    4.456
 dbcsr_matrix_vector_mult_local     304 10.0    3.755    4.201    3.757    4.203
 mp_allgather_i34                   111  8.7    1.207    3.850    1.207    3.850
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.561    3.721
 calculate_norms                    792  9.8    3.601    3.705    3.601    3.705
 mp_irecv_dv                       1241 11.2    1.548    3.668    1.548    3.668
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.510    3.590
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.050    3.586
 ls_scf_post                          1  4.0    0.000    0.000    3.411    3.413
 make_images_sizes                  222  9.7    0.000    0.000    0.834    3.299
 mp_alltoall_i44                    222 10.7    0.834    3.299    0.834    3.299
 ls_scf_store_result                  1  5.0    0.000    0.000    3.185    3.221
 dbcsr_finalize                     304  7.8    0.061    0.077    2.201    2.322
 dbcsr_data_new                    4608  9.7    1.805    2.263    1.805    2.263
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.223    2.228
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.190    2.195
 qs_ks_build_kohn_sham_matrix         3  8.3    0.056    0.064    2.190    2.195
 dbcsr_merge_all                    275  8.9    0.479    0.528    2.058    2.159
 qs_energies_init_hamiltonians        1  3.0    0.002    0.014    1.928    1.928
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=93.734000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3755.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               9.056072E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284089.
 MP_Allreduce         3123                  21388.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.080    0.125  106.397  106.398
 qs_energies                          1  2.0    0.000    0.000  104.625  104.628
 ls_scf                               1  3.0    0.000    0.000  101.659  101.663
 dbcsr_multiply_generic             111  6.7    0.024    0.026   74.812   74.965
 ls_scf_main                          1  4.0    0.000    0.000   63.734   63.734
 density_matrix_trs4                  2  5.0    0.002    0.003   54.897   54.958
 multiply_cannon                    111  7.7    0.113    0.213   48.511   51.035
 multiply_cannon_loop               111  8.7    0.098    0.099   45.432   45.749
 ls_scf_init_scf                      1  4.0    0.000    0.000   34.081   34.082
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   32.401   32.424
 matrix_sqrt_Newton_Schulz            2  6.5    0.013    0.014   29.662   29.667
 mp_waitall_1                      4527 11.1   21.710   25.560   21.710   25.560
 make_m2s                           222  7.7    0.005    0.005   22.604   23.742
 make_images                        222  8.7    3.570    3.867   22.497   23.631
 multiply_cannon_multrec            444  9.7   17.843   18.423   22.482   23.083
 hybrid_alltoall_any                227 10.6    1.652    3.621   12.747   15.748
 make_images_data                   222  9.7    0.003    0.004   12.960   15.186
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.170   10.583
 multiply_cannon_sync_h2d           444  9.7    8.786    8.836    8.786    8.836
 arnoldi_extremal                     4  6.8    0.000    0.000    7.711    7.714
 arnoldi_normal_ev                    4  7.8    0.011    0.017    7.711    7.714
 build_subspace                      16  8.4    0.026    0.036    7.158    7.171
 dbcsr_matrix_vector_mult           304  9.0    0.017    0.034    5.694    5.836
 dbcsr_matrix_vector_mult_local     304 10.0    5.177    5.454    5.179    5.457
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.909    5.163
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    4.952    5.044
 dbcsr_mm_accdrv_process           1814 10.4    0.290    0.354    4.463    4.595
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.126    4.262    4.126    4.262
 ls_scf_post                          1  4.0    0.000    0.000    3.845    3.849
 make_images_sizes                  222  9.7    0.000    0.000    1.517    3.807
 mp_alltoall_i44                    222 10.7    1.516    3.807    1.516    3.807
 mp_allgather_i34                   111  8.7    1.172    3.646    1.172    3.646
 ls_scf_store_result                  1  5.0    0.000    0.000    3.523    3.558
 calculate_norms                    792  9.8    3.233    3.274    3.233    3.274
 dbcsr_finalize                     304  7.8    0.082    0.089    3.073    3.125
 qs_energies_init_hamiltonians        1  3.0    0.021    0.029    2.935    2.935
 dbcsr_merge_all                    275  8.9    0.885    0.913    2.856    2.902
 dbcsr_complete_redistribute          5  7.6    1.438    1.485    2.769    2.883
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.408    2.542
 dbcsr_sort_data                    325 11.1    2.434    2.501    2.434    2.501
 dbcsr_new_transposed                 4  7.5    0.312    0.394    2.425    2.438
 dbcsr_data_new                    6591  9.6    1.885    2.395    1.885    2.395
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.386    2.387
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.317    2.318
 qs_ks_build_kohn_sham_matrix         3  8.3    0.056    0.060    2.317    2.318
 dbcsr_frobenius_norm                74  6.6    2.055    2.127    2.186    2.219
 dbcsr_add_d                        103  6.2    0.000    0.000    2.129    2.204
 dbcsr_add_anytype                  103  7.2    0.858    0.890    2.128    2.204
 dbcsr_data_release               12724 10.6    1.972    2.176    1.972    2.176
 dbcsr_redistribute                   4  8.5    1.367    1.418    2.085    2.158
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=106.398000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=7199.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/41a40305d136800f944133e53e6b8ab400f7c868_performance_tests/27/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32        7009386627072       0.0%      0.0%    100.0%
 flops     9 x     9 x    32        7335108845568       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        9866241589248       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        9884108906496       0.0%      0.0%    100.0%
 flops    22 x    22 x    32       13354440523776       0.0%      0.0%    100.0%
 flops    32 x    32 x     9       20607185977344       0.0%      0.0%    100.0%
 flops    32 x    32 x    22       25186560638976       0.0%      0.0%    100.0%
 flops     9 x    32 x    32       28458319085568       0.0%      0.0%    100.0%
 flops    22 x    32 x    32       34782389993472       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       42881542373376       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       55680402235392       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       55680402235392       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       72328573419520       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       383.054662E+12       0.0%      0.0%    100.0%
 flops max/rank                    733.641090E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                        26899403712       0.0%      0.0%    100.0%
 number of processed stacks             118860288       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     226.3
 marketing flops                   780.439111E+12
 -------------------------------------------------------------------------------
 # multiplications                           1445
 max memory usage/rank             583.921664E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged               102097920
 MPI messages size (bytes):
  total size                        37.227590E+12
  min size                           0.000000E+00
  max size                           4.551360E+06
  average size                     364.626312E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              731472                        0
       128 < size <=     8192            11922720              97670922240
      8192 < size <=    32768            24718992             614677610496
     32768 < size <=   131072            20000256            1970081366016
    131072 < size <=  4194304            42515668           24886801223040
   4194304 < size <= 16777216             2208812            9656099886720
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4640                  78072.
 MP_Allreduce        13232                   2081.
 MP_Sync              1064
 MP_Alltoall          2588                3686719.
 MP_SendRecv        168740                  11136.
 MP_ISendRecv        92040                  11136.
 MP_Wait            102830
 MP_comm_split          40
 MP_ISend            26090                  85106.
 MP_IRecv            37890                  59644.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.132    0.298  273.571  273.574
 qs_mol_dyn_low                       1  2.0    0.011    0.061  271.973  272.004
 qs_forces                            5  3.8    0.005    0.007  271.688  271.699
 qs_energies                          5  4.8    0.011    0.065  268.404  268.451
 scf_env_do_scf                       5  5.8    0.001    0.020  238.990  238.993
 scf_env_do_scf_inner_loop          105  6.6    0.009    0.039  207.806  207.809
 qs_scf_new_mos                     105  7.6    0.000    0.001  162.268  162.493
 qs_scf_loop_do_ot                  105  8.6    0.001    0.009  162.267  162.493
 ot_scf_mini                        105  9.6    0.003    0.005  152.241  152.355
 dbcsr_multiply_generic            1445 12.2    0.145    0.157  149.756  150.343
 multiply_cannon                   1445 13.2    0.276    0.289  128.818  130.617
 multiply_cannon_loop              1445 14.2    2.848    2.999  126.678  128.543
 velocity_verlet                      4  3.0    0.044    0.187  115.356  115.360
 ot_mini                            105 10.6    0.001    0.001   62.316   62.445
 qs_ot_get_p                        112 10.4    0.001    0.001   59.385   59.743
 qs_ot_p2m_diag                      40 11.0    0.020    0.030   47.449   47.543
 mp_waitall_1                    488190 16.1   38.473   47.378   38.473   47.378
 multiply_cannon_multrec          69360 15.2   29.683   34.236   40.454   45.228
 cp_dbcsr_syevd                      40 12.0    0.002    0.002   44.044   44.045
 qs_ot_get_derivative                55 11.6    0.001    0.001   40.450   40.612
 cp_fm_syevd                         40 13.0    0.000    0.001   38.409   38.547
 multiply_cannon_metrocomm3       69360 15.2    0.199    0.210   26.755   36.252
 multiply_cannon_sync_h2d         69360 15.2   28.869   33.337   28.869   33.337
 cp_fm_redistribute_end              40 14.0   16.622   33.163   16.628   33.167
 cp_fm_syevd_base                    40 14.0   16.524   33.065   16.524   33.065
 rebuild_ks_matrix                  110  8.4    0.000    0.000   30.971   31.241
 qs_ks_build_kohn_sham_matrix       110  9.4    0.052    0.230   30.971   31.241
 init_scf_loop                        7  6.6    0.005    0.088   31.131   31.135
 qs_ks_update_qs_env                112  7.6    0.001    0.001   28.452   28.692
 init_scf_run                         5  5.8    0.000    0.001   26.478   26.479
 scf_env_initial_rho_setup            5  6.8    0.001    0.003   26.478   26.479
 prepare_preconditioner               7  7.6    0.000    0.000   25.779   25.844
 make_preconditioner                  7  8.6    0.004    0.107   25.779   25.844
 apply_preconditioner_dbcsr          62 12.6    0.000    0.000   23.098   23.425
 apply_single                        62 13.6    0.000    0.000   23.098   23.425
 qs_rho_update_rho_low              110  7.6    0.000    0.001   21.124   21.530
 calculate_rho_elec                 110  8.6    0.030    0.057   21.123   21.529
 calculate_first_density_matrix       1  7.0    0.003    0.063   21.096   21.132
 ot_new_cg_direction                 55 11.6    0.001    0.001   21.115   21.116
 acc_transpose_blocks             69360 15.2    0.340    0.358   19.384   20.372
 cp_dbcsr_sm_fm_multiply             15  9.3    0.001    0.001   19.945   19.962
 cp_dbcsr_sm_fm_multiply_core        15 10.3    0.000    0.000   19.393   19.444
 acc_transpose_blocks_kernels     69360 16.2    0.845    0.889   16.642   17.541
 make_full_inverse_cholesky           7  9.6    0.000    0.000   17.148   17.213
 jit_kernel_transpose                 5 15.0   15.797   16.684   15.797   16.684
 density_rs2pw                      110  9.6    0.004    0.005   15.081   15.520
 qs_ot_get_orbitals                 105 10.6    0.001    0.001   15.227   15.467
 qs_ot_get_derivative_taylor         37 12.8    0.001    0.001   14.538   14.634
 mp_sum_l                          4764 12.2   12.945   13.873   12.945   13.873
 multiply_cannon_metrocomm1       69360 15.2    0.094    0.101    5.275   12.542
 calculate_dm_sparse                110  9.5    0.000    0.001   12.094   12.332
 transfer_rs2pw                     445 10.6    0.008    0.009   11.842   12.237
 cp_fm_cholesky_invert                7 10.6   11.672   11.688   11.672   11.688
 dbcsr_mm_accdrv_process         154766 15.8    6.231    6.399   10.640   11.490
 fft_wrap_pw1pw2                   1425 12.5    0.020    0.029   11.073   11.404
 qs_ot_get_derivative_diag           18 12.0    0.000    0.001   10.893   10.953
 sum_up_and_integrate                60 10.3    0.002    0.041   10.537   10.550
 integrate_v_rspace                  60 11.3    0.002    0.002   10.518   10.532
 check_diag                          80 13.5    8.685    8.935   10.341   10.478
 qs_vxc_create                      110 10.4    0.002    0.002   10.027   10.054
 fft_wrap_pw1pw2_240                915 14.0    0.208    0.236    9.573    9.842
 fft3d_pb                           915 15.0    2.277    2.533    8.512    8.815
 make_full_single_inverse             7  9.6    0.003    0.026    8.320    8.388
 make_m2s                          2890 13.2    0.077    0.085    7.588    8.112
 make_images                       2890 14.2    0.239    0.259    7.481    8.004
 xc_rho_set_and_dset_create         110 12.4    0.077    0.092    7.048    7.317
 xc_vxc_pw_create                    60 11.3    0.036    0.041    6.738    6.770
 potential_pw2rs                     60 12.3    0.002    0.003    6.587    6.624
 transfer_rs2pw_30                  110 11.6    1.254    1.314    5.708    6.190
 mp_sendrecv_dv                  168740 12.6    5.884    6.031    5.884    6.031
 mp_alltoall_z22v                  2340 16.7    5.655    5.961    5.655    5.961
 xc_pw_derive                       510 13.4    0.005    0.006    5.816    5.904
 transfer_pw2rs                     245 13.2    0.003    0.004    5.495    5.526
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="601", plot="h2o_512_md", label="(64n/12r/1t)", y=273.574000, yerr=0.000000
PlotPoint: name="602", plot="h2o_512_md_mem", label="(64n/12r/1t)", y=556.200000, yerr=1.600000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 41a40305d136800f944133e53e6b8ab400f7c868
Summary: empty
Status: OK