=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 1aa3ad7d27726d25ce9c9edce445974c62615587


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1,
#              Cray-FFTW 3.3.8.10, COSMA 2.6.6, ELPA 2023.05.001,
#              HDF5 1.14.2, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.2.2, LIBVORI 220621, LIBXSMM 1.17,
#              PLUMED 2.9.0, SPGLIB 1.16.2, LIBGRPP 20231215
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Last update: 25.01.2024
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed --with-sirius=no; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.6
USE_ELPA       := 2023.05.001
USE_HDF5       := 1.14.2
USE_LIBGRPP    := 20231215
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.2.2
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.9.0
#USE_QUIP       := 0.9.10
#USE_DEEPMD     := 2.2.7
#USE_SIRIUS     := 7.5.2
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
#LIBVDWXC_VER   := 0.4.0
#SPFFT_VER      := 1.0.6
#SPLA_VER       := 1.5.5
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_DEEPMD),)
   USE_DEEPMD       := $(strip $(USE_DEEPMD))
   DEEPMD_INC       := $(INSTALL_PATH)/libdeepmd_c-$(USE_DEEPMD)/include
   DEEPMD_LIB       := $(INSTALL_PATH)/libdeepmd_c-$(USE_DEEPMD)/lib
   CFLAGS         += -I$(DEEPMD_INC)
   DFLAGS         += -D__DEEPMD
   LIBS           += $(DEEPMD_LIB)/libdeepmd.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_c.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_cc.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_dyn_cudart.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_op.so
   LIBS           += $(DEEPMD_LIB)/libdeepmd_op_cuda.so
   LIBS           += $(DEEPMD_LIB)/libtensorflow_cc.so.2
   LIBS           += $(DEEPMD_LIB)/libtensorflow_framework.so.2
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBGRPP),)
   USE_LIBGRPP    := $(strip $(USE_LIBGRPP))
   LIBGRPP_INC    := $(INSTALL_PATH)/libgrpp-main-$(USE_LIBGRPP)/include
   LIBGRPP_LIB    := $(INSTALL_PATH)/libgrpp-main-$(USE_LIBGRPP)/lib
   CFLAGS         += -I$(LIBGRPP_INC)
   DFLAGS         += -D__LIBGRPP
   LIBS           += $(LIBGRPP_LIB)/liblibgrpp.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
endif

ifneq ($(USE_HDF5),)
   USE_HDF5       := $(strip $(USE_HDF5))
   HDF5_INC       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/include
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/lib
   CFLAGS         += -I$(HDF5_INC)
   DFLAGS         += -D__HDF5
   LIBS           += $(HDF5_LIB)/libhdf5_fortran.a
   LIBS           += $(HDF5_LIB)/libhdf5_hl.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/01
 job id: 51339391
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/02
 job id: 51339392
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/03
 job id: 51339393
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/04
 job id: 51339394
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/05
 job id: 51339396
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/06
 job id: 51339397
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/07
 job id: 51339398
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/08
 job id: 51339399
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/09
 job id: 51339400
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/10
 job id: 51339401
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/11
 job id: 51339402
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/12
 job id: 51339403
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/13
 job id: 51339404
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/14
 job id: 51339405
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/15
 job id: 51339406
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/16
 job id: 51339407
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/17
 job id: 51339408
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/18
 job id: 51339410
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/19
 job id: 51339411
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/20
 job id: 51339412
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/21
 job id: 51339413
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/22
 job id: 51339414
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/23
 job id: 51339415
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/24
 job id: 51339416
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/25
 job id: 51339417
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/26
 job id: 51339418
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: 512 H2O (4 NVE MD steps on 64 nodes)
 input file: benchmarks/QS/00512_H2O/H2O-512_md.inp
 required files: []
 output file: result.log
 # nodes = 64
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/27
 job id: 51339419
 --- Point ---
 name: 601
 plot: h2o_512_md
 regex: CP2K  
 label: (64n/12r/1t)
 --- Point ---
 name: 602
 plot: h2o_512_md_mem
 regex: Estimated peak process memory 
 label: (64n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md_mem", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 172669.
 MP_Allreduce          424                      8.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.033  133.701  133.701
 farming_run                          1  2.0  133.229  133.240  133.673  133.676
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.480593E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              230                1134128.
 MP_Allreduce          571                1938539.
 MP_Sync                25
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split          10
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.005    0.024  115.044  115.044
 qs_energies                          1  2.0    0.001    0.001  114.860  114.862
 mp2_main                             1  3.0    0.000    0.000  112.795  112.797
 mp2_gpw_main                         1  4.0    0.027    0.033  111.796  111.798
 mp2_ri_gpw_compute_in                1  5.0    0.173    0.174   92.856   93.153
 mp2_ri_gpw_compute_in_loop           1  6.0    0.003    0.004   54.884   55.182
 mp2_eri_3c_integrate_gpw           272  7.0    0.152    0.166   41.308   46.385
 get_2c_integrals                     1  6.0    0.008    0.009   37.167   37.798
 integrate_v_rspace                 273  8.0    0.442    0.455   24.767   29.637
 pw_transfer                       6555 10.6    0.385    0.404   27.002   27.508
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.047   25.634   26.168
 grid_integrate_task_list           273  9.0   20.633   26.008   20.633   26.008
 fft_wrap_pw1pw2_100               2178 12.4    0.028    0.029   23.174   23.677
 compute_2c_integrals                 1  7.0    0.002    0.002   19.323   19.325
 rpa_ri_compute_en                    1  5.0    0.019    0.020   18.836   19.143
 compute_2c_integrals_loop_lm         1  8.0    0.002    0.003   18.656   19.046
 mp2_eri_2c_integrate_gpw             1  9.0    2.391    2.433   18.655   19.045
 cp_fm_cholesky_decompose            12  8.2   17.786   18.417   17.786   18.417
 cholesky_decomp                      1  7.0    0.000    0.000   16.688   17.315
 fft3d_s                           5443 13.4   16.217   16.613   16.239   16.632
 ao_to_mo_and_store_B_mult_1        272  7.0   10.757   15.320   10.757   15.320
 calculate_wavefunction             272  8.0    5.453    5.596   12.442   13.124
 rpa_num_int                          1  6.0    0.000    0.004   10.662   10.662
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.646   10.660
 calc_mat_Q                           8  8.0    0.000    0.000    9.500    9.591
 contract_S_to_Q                      8  9.0    0.000    0.000    8.924    9.015
 calc_potential_gpw                 544  9.5    0.004    0.005    8.244    8.807
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.506    8.618
 parallel_gemm_fm_cosma              14 10.1    8.506    8.618    8.506    8.618
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.001    8.111    8.425
 potential_pw2rs                    545 10.0    0.107    0.110    7.552    8.156
 create_integ_mat                     1  6.0    0.024    0.028    7.783    7.784
 collocate_single_gaussian          272 10.0    0.039    0.041    7.348    7.601
 array2fm                             1  7.0    0.000    0.000    6.662    7.236
 pw_scatter_s                      2720 13.7    4.356    4.522    4.356    4.522
 pw_gather_s                       2722 13.2    3.479    3.676    3.479    3.676
 array2fm_buffer_send                 1  8.0    2.997    3.162    2.997    3.162
 pw_poisson_solve                   545 10.5    1.115    1.194    2.183    2.466
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=111.795345, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2799.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 200775.
 MP_Allreduce          424                      9.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.025    0.035  521.857  521.859
 farming_run                          1  2.0  520.993  520.997  521.813  521.816
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827141120       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788822       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.238565E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              757                 478553.
 MP_Allreduce         2021                  21391.
 MP_Sync                37
 MP_Alltoall            77
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.033  319.092  319.093
 qs_energies                          1  2.0    0.003    0.024  318.848  318.885
 mp2_main                             1  3.0    0.007    0.057  203.419  203.434
 mp2_gpw_main                         1  4.0    0.053    0.123  202.524  202.532
 mp2_ri_gpw_compute_en                1  5.0    0.045    0.088  128.405  129.918
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.801    1.965  126.776  126.782
 scf_env_do_scf                       1  3.0    0.000    0.000  115.140  115.140
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  114.241  114.248
 rebuild_ks_matrix                    4  6.0    0.000    0.000  114.240  114.247
 qs_ks_build_kohn_sham_matrix         4  7.0    0.054    0.061  114.240  114.247
 hfx_ks_matrix                        4  8.0    0.001    0.001  113.886  113.890
 integrate_four_center                4  9.0    0.152    0.466  113.885  113.889
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.469    0.501  111.321  111.987
 local_gemm                         172  8.0  110.852  111.506  110.852  111.506
 integrate_four_center_main           4 10.0    0.110    0.659  101.819  105.579
 integrate_four_center_bin          264 11.0  101.709  105.573  101.709  105.573
 init_scf_loop                        1  4.0    0.000    0.000   97.197   97.197
 mp2_ri_gpw_compute_in                1  5.0    0.073    0.142   73.955   74.915
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   53.777   54.734
 mp2_eri_3c_integrate_gpw            91  7.0    0.145    0.166   41.597   46.400
 integrate_v_rspace                  95  8.0    0.398    0.567   28.066   32.724
 pw_transfer                       2240 10.6    0.148    0.168   29.471   29.952
 ao_to_mo_and_store_B_mult_1         91  7.0   10.512   29.096   10.512   29.096
 fft_wrap_pw1pw2                   1868 11.4    0.017    0.020   28.486   28.982
 grid_integrate_task_list            95  9.0   23.480   28.340   23.480   28.340
 fft_wrap_pw1pw2_100                730 12.4    0.012    0.013   26.241   26.702
 get_2c_integrals                     1  6.0    0.002    0.017   20.090   20.115
 compute_2c_integrals                 1  7.0    0.004    0.014   19.064   19.082
 compute_2c_integrals_loop_lm         1  8.0    0.002    0.011   18.580   18.915
 mp2_eri_2c_integrate_gpw             1  9.0    1.742    1.848   18.578   18.915
 fft3d_s                           1823 13.4   18.479   18.850   18.493   18.863
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.000   17.942   17.942
 calculate_wavefunction              91  8.0    2.024    2.057    9.628    9.820
 mp2_ri_gpw_compute_en_comm          22  7.0    0.500    0.522    7.562    8.996
 potential_pw2rs                    186 10.0    0.033    0.034    8.410    8.973
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.013    8.130    8.532
 calc_potential_gpw                 182  9.5    0.002    0.002    7.895    8.127
 collocate_single_gaussian           91 10.0    0.017    0.031    7.730    7.975
 mp_sync                             37 10.5    4.031    7.672    4.031    7.672
 mp_sendrecv_dm3                   2068  8.0    5.594    7.071    5.594    7.071
 integrate_four_center_load           4 10.0    0.000    0.000    6.756    6.760
 hfx_load_balance                     1 11.0    0.000    0.000    6.756    6.760
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=202.522096, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1496.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             444.661760E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62379.
 MP_Allreduce        10329                    270.
 MP_Sync               530
 MP_Alltoall          2083                 592243.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.027   53.354   53.355
 qs_mol_dyn_low                       1  2.0    0.004    0.008   53.109   53.116
 qs_forces                           11  3.9    0.003    0.003   53.046   53.048
 qs_energies                         11  4.9    0.002    0.003   51.572   51.579
 scf_env_do_scf                      11  5.9    0.000    0.001   45.515   45.516
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   43.457   43.457
 qs_scf_new_mos                     108  7.5    0.000    0.000   33.593   33.876
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   33.593   33.876
 dbcsr_multiply_generic            2286 12.5    0.094    0.097   33.457   33.858
 ot_scf_mini                        108  9.5    0.002    0.002   31.929   32.120
 multiply_cannon                   2286 13.5    0.185    0.190   26.213   27.676
 multiply_cannon_loop              2286 14.5    1.828    1.929   25.571   27.058
 velocity_verlet                     10  3.0    0.002    0.005   25.875   25.876
 ot_mini                            108 10.5    0.001    0.001   19.337   19.599
 qs_ot_get_derivative               108 11.5    0.001    0.001   16.396   16.576
 mp_waitall_1                    245248 16.5    8.241   14.442    8.241   14.442
 multiply_cannon_metrocomm3       54864 15.5    0.074    0.079    5.976   12.529
 multiply_cannon_multrec          54864 15.5    3.656    5.725    7.742   11.222
 qs_ot_get_p                        119 10.4    0.001    0.001    7.995    8.311
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.732    7.879
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.012    7.731    7.878
 mp_sum_l                          7287 12.8    5.385    7.179    5.385    7.179
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.804    6.940
 multiply_cannon_sync_h2d         54864 15.5    5.202    6.339    5.202    6.339
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.763    6.204
 dbcsr_mm_accdrv_process          76910 16.1    1.844    2.891    4.002    5.687
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    5.272    5.395
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    5.266    5.314
 init_scf_run                        11  5.9    0.000    0.001    4.759    4.759
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    4.759    4.759
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    4.477    4.477
 sum_up_and_integrate               119 10.3    0.001    0.002    4.348    4.354
 integrate_v_rspace                 119 11.3    0.002    0.003    4.337    4.344
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    4.291    4.292
 cp_fm_redistribute_end              50 14.0    2.189    4.268    2.195    4.271
 qs_rho_update_rho_low              119  7.7    0.000    0.001    4.058    4.220
 calculate_rho_elec                 119  8.7    0.011    0.017    4.058    4.220
 cp_fm_diag_elpa_base                50 14.0    2.071    4.171    2.075    4.179
 calculate_dm_sparse                119  9.5    0.000    0.000    2.987    3.141
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.912    3.099
 apply_single                       119 13.6    0.000    0.000    2.911    3.099
 multiply_cannon_metrocomm1       54864 15.5    0.056    0.061    1.670    2.808
 acc_transpose_blocks             54864 15.5    0.219    0.242    2.237    2.776
 jit_kernel_multiply                 13 15.8    2.097    2.732    2.097    2.732
 ot_diis_step                       108 11.5    0.006    0.006    2.665    2.666
 calculate_first_density_matrix       1  7.0    0.002    0.009    2.582    2.589
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.415    2.479
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.258    2.260
 density_rs2pw                      119  9.7    0.004    0.004    2.053    2.155
 grid_integrate_task_list           119 12.3    2.010    2.106    2.010    2.106
 wfi_extrapolate                     11  7.9    0.001    0.003    2.106    2.106
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.999    2.066
 init_scf_loop                       11  6.9    0.000    0.000    2.043    2.043
 mp_sum_d                          4137 12.0    1.281    1.887    1.281    1.887
 potential_pw2rs                    119 12.3    0.004    0.004    1.744    1.753
 pw_transfer                       1439 11.6    0.051    0.058    1.578    1.643
 make_m2s                          4572 13.5    0.053    0.056    1.547    1.598
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    1.502    1.569
 make_images                       4572 14.5    0.132    0.138    1.466    1.517
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.483    1.501
 acc_transpose_blocks_sync       164592 16.5    1.196    1.434    1.196    1.434
 mp_alltoall_d11v                  2130 13.8    1.287    1.434    1.287    1.434
 transfer_rs2pw                     487 10.6    0.005    0.006    1.317    1.413
 grid_collocate_task_list           119  9.7    1.350    1.410    1.350    1.410
 mp_waitany                       12084 13.8    1.253    1.388    1.253    1.388
 transfer_pw2rs                     487 13.2    0.006    0.007    1.284    1.292
 fft3d_ps                          1201 14.6    0.369    0.473    1.215    1.277
 fft_wrap_pw1pw2_140                487 13.2    0.108    0.114    1.167    1.238
 dbcsr_dot_sd                      1205 11.9    0.049    0.060    0.703    1.098
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=53.355000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=424.090909, yerr=1.239835
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             481.177600E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10306                    303.
 MP_Sync                54
 MP_Alltoall          2060                1032117.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.031   38.624   38.626
 qs_mol_dyn_low                       1  2.0    0.020    0.072   38.235   38.244
 qs_forces                           11  3.9    0.011    0.016   38.154   38.172
 qs_energies                         11  4.9    0.002    0.002   36.482   36.502
 scf_env_do_scf                      11  5.9    0.000    0.001   31.441   31.442
 scf_env_do_scf_inner_loop          108  6.5    0.004    0.010   29.023   29.023
 dbcsr_multiply_generic            2286 12.5    0.101    0.104   21.878   22.221
 qs_scf_new_mos                     108  7.5    0.001    0.001   20.471   20.700
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   20.471   20.700
 ot_scf_mini                        108  9.5    0.002    0.003   19.553   19.719
 multiply_cannon                   2286 13.5    0.207    0.216   16.788   18.239
 velocity_verlet                     10  3.0    0.003    0.010   18.174   18.176
 multiply_cannon_loop              2286 14.5    1.192    1.258   15.658   17.166
 ot_mini                            108 10.5    0.001    0.001   12.065   12.299
 mp_waitall_1                    200699 16.5    5.433   10.686    5.433   10.686
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.716    9.883
 multiply_cannon_metrocomm3       27432 15.5    0.072    0.074    3.975    9.450
 multiply_cannon_multrec          27432 15.5    1.827    4.083    6.149    8.847
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.748    6.885
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.748    6.884
 dbcsr_mm_accdrv_process          47894 16.0    3.577    6.075    4.243    6.333
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.936    6.060
 qs_ot_get_p                        119 10.4    0.001    0.001    4.595    4.817
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.623    4.541
 mp_sum_l                          7287 12.8    2.082    4.084    2.082    4.084
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.063    4.053
 apply_single                       119 13.6    0.000    0.000    3.062    4.053
 init_scf_run                        11  5.9    0.000    0.001    3.789    3.790
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.789    3.789
 sum_up_and_integrate               119 10.3    0.001    0.001    3.673    3.679
 integrate_v_rspace                 119 11.3    0.002    0.003    3.659    3.665
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.575    3.601
 calculate_rho_elec                 119  8.7    0.021    0.024    3.575    3.600
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    2.957    2.977
 make_m2s                          4572 13.5    0.051    0.053    2.564    2.892
 make_images                       4572 14.5    0.204    0.242    2.475    2.806
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.559    2.559
 init_scf_loop                       11  6.9    0.000    0.000    2.398    2.398
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.305    2.392
 ot_diis_step                       108 11.5    0.011    0.011    2.303    2.303
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.281    2.284
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.247    2.247
 cp_fm_redistribute_end              50 14.0    1.139    2.223    1.142    2.225
 multiply_cannon_sync_h2d         27432 15.5    1.717    2.201    1.717    2.201
 cp_fm_diag_elpa_base                50 14.0    1.052    2.139    1.080    2.173
 calculate_dm_sparse                119  9.5    0.000    0.001    2.054    2.133
 acc_transpose_blocks             27432 15.5    0.112    0.118    1.610    1.953
 density_rs2pw                      119  9.7    0.004    0.004    1.877    1.948
 grid_integrate_task_list           119 12.3    1.839    1.934    1.839    1.934
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.858    1.860
 pw_transfer                       1439 11.6    0.065    0.068    1.823    1.851
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.731    1.763
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.667    1.706
 jit_kernel_multiply                  9 16.5    0.610    1.686    0.610    1.686
 make_images_data                  4572 15.5    0.047    0.054    1.222    1.566
 prepare_preconditioner              11  7.9    0.000    0.000    1.480    1.507
 make_preconditioner                 11  8.9    0.000    0.000    1.480    1.507
 wfi_extrapolate                     11  7.9    0.001    0.001    1.450    1.450
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.387    1.443
 hybrid_alltoall_any               4725 16.4    0.053    0.115    1.086    1.430
 potential_pw2rs                    119 12.3    0.006    0.006    1.418    1.422
 fft_wrap_pw1pw2_140                487 13.2    0.127    0.132    1.383    1.416
 fft3d_ps                          1201 14.6    0.521    0.576    1.362    1.391
 grid_collocate_task_list           119  9.7    1.287    1.354    1.287    1.354
 mp_alltoall_d11v                  2130 13.8    1.182    1.349    1.182    1.349
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.241    1.250
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.194    1.242
 transfer_rs2pw                     487 10.6    0.005    0.005    1.081    1.147
 mp_sum_d                          4137 12.0    0.547    1.053    0.547    1.053
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    0.994    1.012
 acc_transpose_blocks_sync        82296 16.5    0.816    0.944    0.816    0.944
 mp_allgather_i34                  2286 14.5    0.557    0.943    0.557    0.943
 transfer_pw2rs                     487 13.2    0.004    0.005    0.891    0.898
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.869    0.882
 acc_transpose_blocks_kernels     27432 16.5    0.190    0.278    0.657    0.864
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=38.626000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=457.636364, yerr=1.871933
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             512.188416E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63490.
 MP_Allreduce        10155                    305.
 MP_Sync                54
 MP_Alltoall          1821                1607811.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.053    0.075   32.007   32.008
 qs_mol_dyn_low                       1  2.0    0.003    0.003   31.634   31.644
 qs_forces                           11  3.9    0.002    0.003   31.371   31.371
 qs_energies                         11  4.9    0.002    0.002   29.839   29.840
 scf_env_do_scf                      11  5.9    0.000    0.001   25.269   25.269
 scf_env_do_scf_inner_loop          108  6.5    0.005    0.009   22.819   22.819
 dbcsr_multiply_generic            2286 12.5    0.096    0.099   16.782   16.842
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.251   15.268
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.250   15.267
 velocity_verlet                     10  3.0    0.002    0.002   15.180   15.188
 ot_scf_mini                        108  9.5    0.002    0.002   14.492   14.509
 multiply_cannon                   2286 13.5    0.193    0.195   13.582   14.372
 multiply_cannon_loop              2286 14.5    0.862    0.929   12.842   13.645
 ot_mini                            108 10.5    0.001    0.001    8.993    9.010
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.516    7.534
 multiply_cannon_multrec          18288 15.5    1.872    2.862    6.972    7.330
 rebuild_ks_matrix                  119  8.3    0.000    0.000    5.943    5.960
 qs_ks_build_kohn_sham_matrix       119  9.3    0.018    0.021    5.942    5.959
 dbcsr_mm_accdrv_process          38222 16.0    4.916    5.756    5.007    5.819
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.237    5.252
 mp_waitall_1                    158411 16.6    2.547    3.717    2.547    3.717
 sum_up_and_integrate               119 10.3    0.001    0.001    3.490    3.494
 integrate_v_rspace                 119 11.3    0.003    0.003    3.478    3.483
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.797    3.416
 init_scf_run                        11  5.9    0.000    0.001    3.413    3.413
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.413    3.413
 qs_ot_get_p                        119 10.4    0.001    0.001    3.286    3.313
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.208    3.213
 calculate_rho_elec                 119  8.7    0.031    0.031    3.207    3.213
 multiply_cannon_metrocomm3       18288 15.5    0.049    0.050    1.466    2.466
 init_scf_loop                       11  6.9    0.000    0.000    2.434    2.434
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.018    2.341
 apply_single                       119 13.6    0.000    0.000    2.018    2.341
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.207    2.208
 qs_ot_p2m_diag                      50 11.0    0.012    0.013    2.181    2.187
 make_m2s                          4572 13.5    0.044    0.045    1.771    1.900
 grid_integrate_task_list           119 12.3    1.807    1.900    1.807    1.900
 density_rs2pw                      119  9.7    0.004    0.004    1.784    1.891
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.886    1.887
 calculate_dm_sparse                119  9.5    0.000    0.001    1.847    1.855
 pw_transfer                       1439 11.6    0.065    0.069    1.825    1.833
 make_images                       4572 14.5    0.190    0.202    1.685    1.813
 acc_transpose_blocks             18288 15.5    0.077    0.079    1.622    1.762
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.732    1.742
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.693    1.704
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.650    1.660
 cp_fm_diag_elpa_base                50 14.0    1.627    1.641    1.648    1.659
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.648    1.650
 prepare_preconditioner              11  7.9    0.000    0.000    1.628    1.635
 make_preconditioner                 11  8.9    0.000    0.000    1.628    1.635
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.490    1.569
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.499    1.506
 ot_diis_step                       108 11.5    0.011    0.011    1.464    1.464
 fft_wrap_pw1pw2_140                487 13.2    0.179    0.183    1.406    1.414
 mp_sum_l                          7287 12.8    1.049    1.375    1.049    1.375
 grid_collocate_task_list           119  9.7    1.232    1.327    1.232    1.327
 potential_pw2rs                    119 12.3    0.007    0.008    1.301    1.303
 fft3d_ps                          1201 14.6    0.546    0.564    1.286    1.296
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.184    1.187
 multiply_cannon_sync_h2d         18288 15.5    1.034    1.185    1.034    1.185
 wfi_extrapolate                     11  7.9    0.001    0.001    1.154    1.154
 transfer_rs2pw                     487 10.6    0.005    0.005    0.940    1.061
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.945    0.963
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    0.936    0.937
 make_images_data                  4572 15.5    0.047    0.051    0.770    0.928
 acc_transpose_blocks_sync        54864 16.5    0.740    0.874    0.740    0.874
 hybrid_alltoall_any               4725 16.4    0.058    0.114    0.671    0.860
 mp_alltoall_d11v                  2130 13.8    0.637    0.811    0.637    0.811
 acc_transpose_blocks_kernels     18288 16.5    0.219    0.225    0.787    0.795
 transfer_pw2rs                     487 13.2    0.004    0.004    0.784    0.786
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.775    0.776
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.672    0.739
 cp_fm_cholesky_invert               11 10.9    0.687    0.690    0.687    0.690
 mp_alltoall_z22v                  1201 16.6    0.594    0.660    0.594    0.660
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=32.008000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=487.909091, yerr=0.668043
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             548.712448E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63489.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.037   35.882   35.883
 qs_mol_dyn_low                       1  2.0    0.003    0.003   35.667   35.675
 qs_forces                           11  3.9    0.002    0.003   35.610   35.611
 qs_energies                         11  4.9    0.002    0.002   33.910   33.915
 scf_env_do_scf                      11  5.9    0.000    0.001   28.970   28.972
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   25.651   25.651
 dbcsr_multiply_generic            2286 12.5    0.107    0.110   19.730   19.837
 velocity_verlet                     10  3.0    0.001    0.002   18.249   18.251
 qs_scf_new_mos                     108  7.5    0.001    0.001   17.647   17.703
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   17.646   17.703
 ot_scf_mini                        108  9.5    0.002    0.003   16.619   16.674
 multiply_cannon                   2286 13.5    0.218    0.227   16.094   16.412
 multiply_cannon_loop              2286 14.5    1.534    1.612   15.162   15.505
 ot_mini                            108 10.5    0.001    0.001   10.259   10.322
 multiply_cannon_multrec          27432 15.5    2.452    3.107    9.024    9.325
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.394    8.451
 dbcsr_mm_accdrv_process          47916 15.9    6.079    7.770    6.469    7.831
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.267    6.311
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.267    6.311
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.550    5.589
 qs_ot_get_p                        119 10.4    0.001    0.001    3.526    3.603
 init_scf_run                        11  5.9    0.000    0.001    3.596    3.597
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    3.596    3.596
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.116    3.521
 sum_up_and_integrate               119 10.3    0.001    0.001    3.439    3.448
 integrate_v_rspace                 119 11.3    0.003    0.003    3.427    3.437
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.332    3.369
 calculate_rho_elec                 119  8.7    0.040    0.046    3.331    3.369
 init_scf_loop                       11  6.9    0.000    0.000    3.300    3.301
 acc_transpose_blocks             27432 15.5    0.117    0.120    2.398    2.504
 prepare_preconditioner              11  7.9    0.000    0.000    2.479    2.487
 make_preconditioner                 11  8.9    0.000    0.000    2.479    2.487
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.091    2.416
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.324    2.352
 make_m2s                          4572 13.5    0.054    0.056    2.239    2.345
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.120    2.243
 apply_single                       119 13.6    0.000    0.000    2.120    2.243
 make_images                       4572 14.5    0.272    0.334    2.130    2.234
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.227    2.229
 mp_waitall_1                    137007 16.6    1.701    2.213    1.701    2.213
 calculate_dm_sparse                119  9.5    0.000    0.000    2.125    2.180
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.137    2.147
 pw_transfer                       1439 11.6    0.065    0.069    1.960    1.996
 grid_integrate_task_list           119 12.3    1.831    1.922    1.831    1.922
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.867    1.906
 density_rs2pw                      119  9.7    0.004    0.004    1.774    1.864
 ot_diis_step                       108 11.5    0.012    0.012    1.826    1.826
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.823    1.824
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.736    1.736
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.650    1.663
 fft_wrap_pw1pw2_140                487 13.2    0.227    0.235    1.557    1.598
 acc_transpose_blocks_sync        82296 16.5    1.433    1.531    1.433    1.531
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.504    1.513
 cp_fm_diag_elpa_base                50 14.0    1.471    1.486    1.502    1.512
 fft3d_ps                          1201 14.6    0.590    0.651    1.356    1.379
 grid_collocate_task_list           119  9.7    1.248    1.329    1.248    1.329
 wfi_extrapolate                     11  7.9    0.001    0.001    1.318    1.318
 multiply_cannon_metrocomm3       27432 15.5    0.040    0.042    0.766    1.310
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.281    1.301
 mp_sum_l                          7287 12.8    0.963    1.268    0.963    1.268
 potential_pw2rs                    119 12.3    0.008    0.009    1.252    1.254
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.225    1.233
 cp_fm_upper_to_full                 72 14.2    0.814    1.157    0.814    1.157
 qs_energies_init_hamiltonians       11  5.9    0.018    0.021    1.087    1.088
 jit_kernel_multiply                  6 16.5    0.325    1.082    0.325    1.082
 dbcsr_complete_redistribute        329 12.2    0.120    0.145    0.750    1.031
 transfer_rs2pw                     487 10.6    0.004    0.005    0.865    0.958
 make_images_data                  4572 15.5    0.048    0.053    0.834    0.949
 hybrid_alltoall_any               4725 16.4    0.065    0.155    0.713    0.896
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.801    0.878
 mp_alltoall_d11v                  2130 13.8    0.724    0.871    0.724    0.871
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.561    0.832
 acc_transpose_blocks_kernels     27432 16.5    0.272    0.281    0.819    0.829
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.804    0.808
 cp_fm_cholesky_invert               11 10.9    0.722    0.725    0.722    0.725
 mp_alltoall_i22                    627 13.8    0.423    0.723    0.423    0.723
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=35.883000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=521.636364, yerr=2.496278
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             607.068160E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63488.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.062   28.423   28.424
 qs_mol_dyn_low                       1  2.0    0.003    0.003   28.172   28.180
 qs_forces                           11  3.9    0.002    0.002   28.116   28.117
 qs_energies                         11  4.9    0.002    0.002   26.422   26.424
 scf_env_do_scf                      11  5.9    0.000    0.001   21.697   21.697
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   19.232   19.232
 velocity_verlet                     10  3.0    0.002    0.002   14.510   14.512
 dbcsr_multiply_generic            2286 12.5    0.096    0.099   12.942   13.015
 qs_scf_new_mos                     108  7.5    0.001    0.001   11.585   11.609
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   11.585   11.608
 ot_scf_mini                        108  9.5    0.002    0.002   10.884   10.911
 multiply_cannon                   2286 13.5    0.224    0.228   10.425   10.779
 multiply_cannon_loop              2286 14.5    0.641    0.656    9.545    9.721
 ot_mini                            108 10.5    0.001    0.001    6.271    6.303
 multiply_cannon_multrec           9144 15.5    1.703    1.978    5.976    6.237
 rebuild_ks_matrix                  119  8.3    0.000    0.000    5.804    5.827
 qs_ks_build_kohn_sham_matrix       119  9.3    0.016    0.018    5.804    5.826
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.171    5.192
 qs_ot_get_derivative               108 11.5    0.001    0.001    4.992    5.018
 dbcsr_mm_accdrv_process          12550 15.8    3.571    4.191    4.164    4.247
 sum_up_and_integrate               119 10.3    0.001    0.001    3.410    3.416
 integrate_v_rspace                 119 11.3    0.003    0.003    3.400    3.405
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.338    3.346
 calculate_rho_elec                 119  8.7    0.060    0.061    3.338    3.345
 init_scf_run                        11  5.9    0.000    0.001    3.249    3.249
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.248    3.248
 qs_ot_get_p                        119 10.4    0.001    0.001    2.758    2.786
 init_scf_loop                       11  6.9    0.000    0.000    2.446    2.447
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.123    2.124
 pw_transfer                       1439 11.6    0.066    0.068    2.109    2.118
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.015    2.028
 grid_integrate_task_list           119 12.3    1.865    1.923    1.865    1.923
 density_rs2pw                      119  9.7    0.003    0.003    1.759    1.857
 make_m2s                          4572 13.5    0.033    0.034    1.717    1.842
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.787    1.790
 calculate_dm_sparse                119  9.5    0.000    0.000    1.745    1.763
 mp_waitall_1                    115863 16.7    1.313    1.762    1.313    1.762
 make_images                       4572 14.5    0.268    0.299    1.628    1.751
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.680    1.703
 fft_wrap_pw1pw2_140                487 13.2    0.325    0.335    1.681    1.693
 prepare_preconditioner              11  7.9    0.000    0.000    1.682    1.687
 make_preconditioner                 11  8.9    0.000    0.000    1.682    1.687
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.571    1.599
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.556    1.557
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.548    1.549
 acc_transpose_blocks              9144 15.5    0.042    0.043    1.443    1.478
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.393    1.400
 fft3d_ps                          1201 14.6    0.651    0.662    1.366    1.376
 grid_collocate_task_list           119  9.7    1.297    1.372    1.297    1.372
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.327    1.339
 jit_kernel_multiply                  8 15.8    0.554    1.337    0.554    1.337
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.275    1.283
 cp_fm_diag_elpa_base                50 14.0    1.248    1.265    1.274    1.282
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.265    1.266
 ot_diis_step                       108 11.5    0.013    0.013    1.266    1.266
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.216    1.248
 apply_single                       119 13.6    0.000    0.000    1.216    1.248
 potential_pw2rs                    119 12.3    0.010    0.011    1.224    1.226
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.141    1.146
 wfi_extrapolate                     11  7.9    0.001    0.001    1.074    1.074
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.875    0.925
 hybrid_alltoall_any               4725 16.4    0.065    0.177    0.742    0.915
 transfer_rs2pw                     487 10.6    0.004    0.005    0.782    0.872
 make_images_data                  4572 15.5    0.042    0.046    0.729    0.869
 cp_fm_cholesky_invert               11 10.9    0.838    0.841    0.838    0.841
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.810    0.816
 mp_alltoall_d11v                  2130 13.8    0.733    0.802    0.733    0.802
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.021    0.372    0.762
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.753    0.754
 acc_transpose_blocks_sync        27432 16.5    0.719    0.743    0.719    0.743
 qs_env_update_s_mstruct             11  6.9    0.001    0.002    0.685    0.725
 acc_transpose_blocks_kernels      9144 16.5    0.118    0.119    0.666    0.687
 transfer_pw2rs                     487 13.2    0.003    0.004    0.626    0.627
 mp_alltoall_z22v                  1201 16.6    0.589    0.622    0.589    0.622
 mp_allgather_i34                  2286 14.5    0.197    0.586    0.197    0.586
 jit_kernel_transpose                 5 15.6    0.548    0.570    0.548    0.570
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=28.424000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=573.090909, yerr=7.391082
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             753.246208E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63723.
 MP_Allreduce        10154                    429.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.030   42.459   42.460
 qs_mol_dyn_low                       1  2.0    0.003    0.003   42.262   42.269
 qs_forces                           11  3.9    0.003    0.006   42.200   42.200
 qs_energies                         11  4.9    0.002    0.002   40.194   40.198
 scf_env_do_scf                      11  5.9    0.001    0.001   34.412   34.413
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   26.602   26.604
 velocity_verlet                     10  3.0    0.002    0.002   23.971   23.977
 dbcsr_multiply_generic            2286 12.5    0.104    0.107   18.718   18.956
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.969   17.060
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.968   17.059
 ot_scf_mini                        108  9.5    0.002    0.002   15.813   15.911
 multiply_cannon                   2286 13.5    0.299    0.304   14.965   15.890
 multiply_cannon_loop              2286 14.5    0.867    0.891   13.731   14.686
 ot_mini                            108 10.5    0.001    0.001    9.627    9.740
 multiply_cannon_multrec           9144 15.5    3.338    4.599    8.789    8.881
 init_scf_loop                       11  6.9    0.000    0.000    7.785    7.788
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.596    7.694
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.245    7.383
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    7.245    7.383
 prepare_preconditioner              11  7.9    0.000    0.000    6.827    6.839
 make_preconditioner                 11  8.9    0.000    0.000    6.826    6.839
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.316    6.719
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.522    6.648
 dbcsr_mm_accdrv_process          12550 15.8    4.739    6.512    5.318    6.566
 cp_fm_upper_to_full                 72 14.2    3.210    4.666    3.210    4.666
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.163    4.169
 calculate_rho_elec                 119  8.7    0.117    0.120    4.163    4.169
 sum_up_and_integrate               119 10.3    0.001    0.001    3.847    3.853
 integrate_v_rspace                 119 11.3    0.004    0.004    3.837    3.843
 init_scf_run                        11  5.9    0.000    0.001    3.735    3.735
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.735    3.735
 qs_ot_get_p                        119 10.4    0.001    0.001    3.282    3.419
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.780    3.176
 mp_waitall_1                     94719 16.7    2.119    3.138    2.119    3.138
 pw_transfer                       1439 11.6    0.069    0.070    2.973    2.977
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.873    2.877
 dbcsr_complete_redistribute        329 12.2    0.283    0.288    2.016    2.848
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.729    2.553
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.173    2.489
 apply_single                       119 13.6    0.000    0.000    2.173    2.488
 fft_wrap_pw1pw2_140                487 13.2    0.651    0.654    2.461    2.467
 make_m2s                          4572 13.5    0.037    0.037    2.241    2.384
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.505    2.324
 mp_alltoall_i22                    627 13.8    1.478    2.323    1.478    2.323
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.285    2.289
 calculate_dm_sparse                119  9.5    0.000    0.000    2.268    2.284
 make_images                       4572 14.5    0.353    0.387    2.122    2.264
 density_rs2pw                      119  9.7    0.003    0.004    2.197    2.220
 multiply_cannon_metrocomm3        9144 15.5    0.021    0.021    1.269    2.193
 grid_integrate_task_list           119 12.3    2.081    2.147    2.081    2.147
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.038    2.088
 ot_diis_step                       108 11.5    0.014    0.014    2.005    2.005
 qs_ot_p2m_diag                      50 11.0    0.043    0.043    1.931    1.933
 acc_transpose_blocks              9144 15.5    0.044    0.045    1.831    1.879
 fft3d_ps                          1201 14.6    0.866    0.885    1.793    1.797
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.777    1.777
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.769    1.770
 mp_sum_l                          7287 12.8    0.962    1.750    0.962    1.750
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.578    1.578
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.534    1.578
 grid_collocate_task_list           119  9.7    1.522    1.562    1.522    1.562
 wfi_extrapolate                     11  7.9    0.001    0.001    1.385    1.385
 cp_fm_cholesky_invert               11 10.9    1.379    1.382    1.379    1.382
 potential_pw2rs                    119 12.3    0.014    0.014    1.335    1.336
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.301    1.321
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.303    1.303
 cp_fm_diag_elpa_base                50 14.0    1.158    1.211    1.301    1.301
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.275    1.283
 hybrid_alltoall_any               4725 16.4    0.091    0.151    1.016    1.211
 mp_alltoall_d11v                  2130 13.8    1.149    1.175    1.149    1.175
 acc_transpose_blocks_sync        27432 16.5    1.111    1.152    1.111    1.152
 qs_env_update_s_mstruct             11  6.9    0.002    0.002    1.098    1.114
 make_images_data                  4572 15.5    0.046    0.050    0.932    1.103
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    0.995    1.025
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.931    0.944
 qs_create_task_list                 11  7.9    0.001    0.001    0.927    0.938
 generate_qs_task_list               11  8.9    0.369    0.388    0.926    0.938
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=42.460000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=704.181818, yerr=13.749831
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             493.506560E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66212.
 MP_Allreduce         9776                    488.
 MP_Sync                52
 MP_Alltoall          1938                1383689.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.032   80.114   80.116
 qs_mol_dyn_low                       1  2.0    0.005    0.017   79.812   79.822
 qs_forces                           11  3.9    0.003    0.003   79.730   79.732
 qs_energies                         11  4.9    0.003    0.010   76.891   76.903
 scf_env_do_scf                      11  5.9    0.000    0.001   68.091   68.094
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.010   62.510   62.511
 dbcsr_multiply_generic            2055 12.4    0.104    0.106   49.687   50.163
 qs_scf_new_mos                      99  7.5    0.000    0.001   45.662   45.795
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   45.661   45.794
 ot_scf_mini                         99  9.5    0.002    0.003   43.348   43.480
 multiply_cannon                   2055 13.4    0.181    0.185   42.033   43.211
 multiply_cannon_loop              2055 14.4    1.773    1.816   41.074   42.317
 velocity_verlet                     10  3.0    0.002    0.007   41.938   41.939
 ot_mini                             99 10.5    0.002    0.009   25.575   25.701
 qs_ot_get_derivative                99 11.5    0.001    0.001   18.772   18.898
 multiply_cannon_multrec          49320 15.4   11.583   12.220   17.476   18.232
 rebuild_ks_matrix                  110  8.3    0.001    0.001   14.432   14.627
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.019   14.432   14.627
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.653   12.831
 mp_waitall_1                    220248 16.4    9.969   11.075    9.969   11.075
 multiply_cannon_sync_h2d         49320 15.4    9.840   10.388    9.840   10.388
 qs_ot_get_p                        110 10.4    0.001    0.002    9.520    9.628
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.229    7.703
 apply_single                       110 13.6    0.000    0.000    7.228    7.702
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.088    7.642
 multiply_cannon_metrocomm3       49320 15.4    0.083    0.087    6.072    7.544
 sum_up_and_integrate               110 10.3    0.002    0.003    6.951    6.966
 integrate_v_rspace                 110 11.3    0.003    0.003    6.925    6.948
 init_scf_run                        11  5.9    0.000    0.001    6.710    6.711
 scf_env_initial_rho_setup           11  6.9    0.001    0.003    6.710    6.710
 ot_diis_step                        99 11.5    0.005    0.006    6.625    6.625
 qs_ot_p2m_diag                      48 11.0    0.013    0.019    6.345    6.371
 qs_rho_update_rho_low              110  7.6    0.001    0.002    6.173    6.288
 calculate_rho_elec                 110  8.6    0.021    0.025    6.173    6.287
 dbcsr_mm_accdrv_process          87628 16.1    2.953    3.083    5.761    6.083
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    5.563    5.563
 init_scf_loop                       11  6.9    0.007    0.058    5.554    5.559
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    5.163    5.231
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    5.000    5.022
 cp_fm_diag_elpa_base                48 14.0    4.988    5.010    4.998    5.021
 mp_sum_l                          6594 12.7    3.384    4.163    3.384    4.163
 wfi_extrapolate                     11  7.9    0.002    0.008    4.039    4.039
 make_m2s                          4110 13.4    0.060    0.064    3.825    3.958
 make_images                       4110 14.4    0.177    0.190    3.731    3.867
 calculate_dm_sparse                110  9.5    0.001    0.001    3.675    3.778
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.519    3.523
 multiply_cannon_metrocomm1       49320 15.4    0.065    0.068    2.105    3.464
 grid_integrate_task_list           110 12.3    3.263    3.426    3.263    3.426
 density_rs2pw                      110  9.6    0.004    0.004    3.264    3.407
 prepare_preconditioner              11  7.9    0.000    0.000    3.248    3.266
 make_preconditioner                 11  8.9    0.000    0.001    3.248    3.266
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.186    3.237
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    3.046    3.107
 pw_transfer                       1331 11.6    0.055    0.068    3.017    3.099
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.033    3.080
 fft_wrap_pw1pw2                   1111 12.6    0.007    0.008    2.929    3.010
 fft_wrap_pw1pw2_140                451 13.1    0.309    0.332    2.517    2.608
 calculate_first_density_matrix       1  7.0    0.001    0.004    2.571    2.576
 acc_transpose_blocks             49320 15.4    0.199    0.207    2.485    2.571
 potential_pw2rs                    110 12.3    0.005    0.006    2.549    2.565
 jit_kernel_multiply                 13 15.9    2.534    2.556    2.534    2.556
 mp_alltoall_d11v                  2046 13.8    1.997    2.429    1.997    2.429
 fft3d_ps                          1111 14.6    0.797    0.887    2.223    2.284
 grid_collocate_task_list           110  9.6    2.155    2.263    2.155    2.263
 mp_waitany                       14300 13.8    1.825    2.094    1.825    2.094
 transfer_rs2pw                     451 10.6    0.005    0.006    1.851    1.989
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.911    1.932
 make_images_data                  4110 15.4    0.043    0.048    1.734    1.857
 mp_sum_d                          3891 11.9    1.332    1.855    1.332    1.855
 hybrid_alltoall_any               4261 16.3    0.084    0.484    1.507    1.809
 cp_fm_cholesky_invert               11 10.9    1.751    1.755    1.751    1.755
 transfer_pw2rs                     451 13.1    0.006    0.007    1.726    1.732
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.637    1.664
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=80.116000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=468.090909, yerr=2.502891
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             583.684096E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66430.
 MP_Allreduce         9775                    566.
 MP_Sync                52
 MP_Alltoall          1717                1714660.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.026   68.248   68.249
 qs_mol_dyn_low                       1  2.0    0.004    0.008   68.039   68.050
 qs_forces                           11  3.9    0.003    0.003   67.947   67.949
 qs_energies                         11  4.9    0.002    0.002   64.635   64.640
 scf_env_do_scf                      11  5.9    0.000    0.001   56.110   56.114
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   48.701   48.702
 dbcsr_multiply_generic            2055 12.4    0.115    0.119   37.780   38.013
 velocity_verlet                     10  3.0    0.002    0.006   35.733   35.735
 qs_scf_new_mos                      99  7.5    0.001    0.001   32.939   33.062
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   32.939   33.061
 multiply_cannon                   2055 13.4    0.224    0.242   31.302   32.399
 ot_scf_mini                         99  9.5    0.003    0.003   31.272   31.400
 multiply_cannon_loop              2055 14.4    1.164    1.183   30.095   30.805
 ot_mini                             99 10.5    0.001    0.001   18.259   18.401
 multiply_cannon_multrec          24660 15.4    6.982    8.511   14.072   15.527
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.469   13.568
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   13.469   13.568
 qs_ot_get_derivative                99 11.5    0.001    0.001   12.501   12.632
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.846   11.943
 mp_waitall_1                    176588 16.5    7.431    9.827    7.431    9.827
 multiply_cannon_sync_h2d         24660 15.4    6.438    8.015    6.438    8.015
 multiply_cannon_metrocomm3       24660 15.4    0.074    0.075    5.096    7.934
 init_scf_loop                       11  6.9    0.000    0.000    7.375    7.376
 dbcsr_mm_accdrv_process          52282 16.1    5.572    6.465    6.920    7.215
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.437    7.065
 apply_single                       110 13.6    0.000    0.001    6.437    7.065
 qs_ot_get_p                        110 10.4    0.001    0.001    6.372    6.537
 sum_up_and_integrate               110 10.3    0.001    0.003    6.322    6.332
 integrate_v_rspace                 110 11.3    0.003    0.003    6.295    6.306
 init_scf_run                        11  5.9    0.000    0.001    6.137    6.138
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.137    6.137
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.839    5.851
 calculate_rho_elec                 110  8.6    0.039    0.047    5.839    5.851
 ot_diis_step                        99 11.5    0.010    0.010    5.712    5.712
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.697    5.413
 prepare_preconditioner              11  7.9    0.000    0.000    5.379    5.399
 make_preconditioner                 11  8.9    0.000    0.000    5.379    5.399
 make_full_inverse_cholesky          11  9.9    0.000    0.000    4.981    5.134
 make_m2s                          4110 13.4    0.056    0.059    4.133    4.543
 qs_ot_p2m_diag                      48 11.0    0.029    0.044    4.503    4.522
 make_images                       4110 14.4    0.404    0.460    4.022    4.431
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    4.040    4.041
 pw_transfer                       1331 11.6    0.066    0.072    3.508    3.641
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    3.401    3.537
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.487    3.500
 cp_fm_diag_elpa_base                48 14.0    3.441    3.452    3.484    3.497
 wfi_extrapolate                     11  7.9    0.001    0.001    3.477    3.477
 density_rs2pw                      110  9.6    0.004    0.004    3.093    3.383
 grid_integrate_task_list           110 12.3    3.168    3.360    3.168    3.360
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.257    3.328
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.213    3.215
 fft_wrap_pw1pw2_140                451 13.1    0.359    0.379    2.924    3.060
 calculate_dm_sparse                110  9.5    0.001    0.001    3.009    3.033
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.882    2.927
 make_images_data                  4110 15.4    0.048    0.053    2.251    2.691
 hybrid_alltoall_any               4261 16.3    0.105    0.454    1.934    2.683
 fft3d_ps                          1111 14.6    1.115    1.349    2.492    2.648
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.564    2.567
 cp_fm_cholesky_invert               11 10.9    2.489    2.495    2.489    2.495
 mp_sum_l                          6594 12.7    1.710    2.450    1.710    2.450
 grid_collocate_task_list           110  9.6    2.168    2.293    2.168    2.293
 potential_pw2rs                    110 12.3    0.008    0.009    2.209    2.221
 mp_alltoall_d11v                  2046 13.8    1.672    2.034    1.672    2.034
 acc_transpose_blocks             24660 15.4    0.113    0.118    1.966    1.989
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.951    1.968
 jit_kernel_multiply                 10 16.4    0.984    1.931    0.984    1.931
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    1.888    1.889
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.777    1.790
 transfer_rs2pw                     451 10.6    0.006    0.007    1.442    1.724
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.575    1.587
 multiply_cannon_metrocomm4       22605 15.4    0.074    0.078    0.769    1.549
 mp_allgather_i34                  2055 14.4    0.517    1.520    0.517    1.520
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.389    1.493
 mp_waitany                       10164 13.8    1.214    1.411    1.214    1.411
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=68.249000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=552.545455, yerr=6.959149
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             657.338368E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66421.
 MP_Allreduce         9774                    562.
 MP_Sync                52
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.028   59.190   59.191
 qs_mol_dyn_low                       1  2.0    0.003    0.003   58.945   58.955
 qs_forces                           11  3.9    0.003    0.003   58.880   58.880
 qs_energies                         11  4.9    0.001    0.002   55.692   55.695
 scf_env_do_scf                      11  5.9    0.000    0.001   47.772   47.773
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   39.418   39.419
 velocity_verlet                     10  3.0    0.001    0.002   32.134   32.135
 dbcsr_multiply_generic            2055 12.4    0.110    0.114   28.512   28.756
 qs_scf_new_mos                      99  7.5    0.001    0.001   24.893   24.984
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   24.892   24.983
 ot_scf_mini                         99  9.5    0.002    0.002   23.648   23.766
 multiply_cannon                   2055 13.4    0.212    0.222   22.416   23.647
 multiply_cannon_loop              2055 14.4    0.812    0.843   21.280   22.427
 ot_mini                             99 10.5    0.001    0.001   13.562   13.680
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.069   12.207
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   12.068   12.206
 multiply_cannon_multrec          16440 15.4    3.738    4.653    9.935   10.809
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.622   10.746
 mp_waitall_1                    139946 16.5    6.728    9.831    6.728    9.831
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.175    9.289
 init_scf_loop                       11  6.9    0.000    0.000    8.322    8.322
 multiply_cannon_metrocomm3       16440 15.4    0.046    0.048    4.232    7.204
 prepare_preconditioner              11  7.9    0.000    0.000    6.590    6.611
 make_preconditioner                 11  8.9    0.000    0.000    6.590    6.611
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.944    6.288
 sum_up_and_integrate               110 10.3    0.001    0.002    6.218    6.235
 integrate_v_rspace                 110 11.3    0.003    0.003    6.192    6.210
 dbcsr_mm_accdrv_process          34862 16.1    5.331    5.710    6.042    6.151
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.679    5.688
 calculate_rho_elec                 110  8.6    0.059    0.059    5.678    5.688
 init_scf_run                        11  5.9    0.000    0.001    5.546    5.546
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.545    5.546
 qs_ot_get_p                        110 10.4    0.001    0.001    5.276    5.414
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.843    5.327
 apply_single                       110 13.6    0.000    0.000    4.843    5.327
 make_m2s                          4110 13.4    0.049    0.051    4.110    4.474
 make_images                       4110 14.4    0.399    0.524    3.993    4.359
 ot_diis_step                        99 11.5    0.011    0.011    4.359    4.359
 multiply_cannon_sync_h2d         16440 15.4    3.272    4.029    3.272    4.029
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.188    3.762
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    3.679    3.683
 pw_transfer                       1331 11.6    0.066    0.073    3.563    3.574
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.455    3.470
 grid_integrate_task_list           110 12.3    3.201    3.397    3.201    3.397
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.313    3.313
 density_rs2pw                      110  9.6    0.004    0.004    2.936    3.133
 fft_wrap_pw1pw2_140                451 13.1    0.461    0.477    3.007    3.025
 wfi_extrapolate                     11  7.9    0.001    0.001    2.924    2.924
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.790    2.798
 cp_fm_diag_elpa_base                48 14.0    2.725    2.754    2.789    2.796
 hybrid_alltoall_any               4261 16.3    0.108    0.380    2.088    2.789
 make_images_data                  4110 15.4    0.046    0.051    2.320    2.758
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.738    2.740
 cp_fm_cholesky_invert               11 10.9    2.574    2.580    2.574    2.580
 calculate_dm_sparse                110  9.5    0.001    0.001    2.512    2.540
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.531    2.533
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.410    2.470
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.435    2.469
 fft3d_ps                          1111 14.6    1.122    1.140    2.370    2.381
 multiply_cannon_metrocomm4       14385 15.4    0.047    0.050    0.864    2.378
 grid_collocate_task_list           110  9.6    2.212    2.369    2.212    2.369
 mp_irecv_dv                      48980 15.7    0.789    2.240    0.789    2.240
 potential_pw2rs                    110 12.3    0.011    0.011    2.077    2.085
 mp_sum_l                          6594 12.7    1.359    2.050    1.359    2.050
 mp_alltoall_d11v                  2046 13.8    1.731    1.987    1.731    1.987
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.948    1.948
 dbcsr_complete_redistribute        325 12.2    0.323    0.355    1.375    1.824
 cp_fm_upper_to_full                 70 14.2    1.367    1.728    1.367    1.728
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.637    1.654
 acc_transpose_blocks             16440 15.4    0.077    0.079    1.571    1.648
 cp_fm_cholesky_decompose            22 10.9    1.535    1.552    1.535    1.552
 mp_allgather_i34                  2055 14.4    0.452    1.506    0.452    1.506
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.377    1.506
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.451    1.462
 transfer_rs2pw                     451 10.6    0.005    0.006    1.236    1.426
 mp_waitany                       17072 13.8    1.124    1.372    1.124    1.372
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.927    1.367
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.265    1.273
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=59.191000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=624.090909, yerr=7.500964
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             723.566592E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66419.
 MP_Allreduce         9774                    603.
 MP_Sync                52
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.009    0.027   65.160   65.161
 qs_mol_dyn_low                       1  2.0    0.003    0.003   64.947   64.958
 qs_forces                           11  3.9    0.003    0.003   64.748   64.749
 qs_energies                         11  4.9    0.002    0.002   61.358   61.361
 scf_env_do_scf                      11  5.9    0.000    0.001   53.206   53.209
 scf_env_do_scf_inner_loop           99  6.5    0.014    0.023   41.661   41.661
 velocity_verlet                     10  3.0    0.003    0.003   37.043   37.050
 dbcsr_multiply_generic            2055 12.4    0.117    0.123   30.398   30.633
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.155   27.252
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.155   27.251
 ot_scf_mini                         99  9.5    0.002    0.003   25.475   25.565
 multiply_cannon                   2055 13.4    0.236    0.253   23.483   24.373
 multiply_cannon_loop              2055 14.4    1.406    1.469   22.126   22.810
 ot_mini                             99 10.5    0.001    0.001   14.703   14.798
 multiply_cannon_multrec          24660 15.4    4.100    6.749   13.065   14.131
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.870   11.982
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   11.869   11.981
 init_scf_loop                       11  6.9    0.000    0.000   11.507   11.508
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.544   10.629
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.480   10.582
 prepare_preconditioner              11  7.9    0.000    0.000    9.768    9.783
 make_preconditioner                 11  8.9    0.000    0.000    9.768    9.783
 dbcsr_mm_accdrv_process          52304 16.0    7.845    9.427    8.807    9.762
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.015    9.477
 sum_up_and_integrate               110 10.3    0.001    0.002    6.172    6.186
 integrate_v_rspace                 110 11.3    0.003    0.003    6.146    6.159
 qs_ot_get_p                        110 10.4    0.001    0.001    5.724    5.873
 mp_waitall_1                    121746 16.5    4.065    5.818    4.065    5.818
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.793    5.804
 calculate_rho_elec                 110  8.6    0.078    0.081    5.793    5.804
 make_m2s                          4110 13.4    0.059    0.061    5.310    5.642
 make_images                       4110 14.4    0.574    0.694    5.168    5.497
 init_scf_run                        11  5.9    0.000    0.001    5.432    5.432
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.432    5.432
 cp_fm_upper_to_full                 70 14.2    3.379    4.882    3.379    4.882
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.037    4.129
 apply_single                       110 13.6    0.000    0.000    4.037    4.129
 ot_diis_step                        99 11.5    0.011    0.011    4.122    4.122
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    3.866    3.884
 pw_transfer                       1331 11.6    0.066    0.073    3.727    3.763
 dbcsr_complete_redistribute        325 12.2    0.416    0.460    2.603    3.751
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.620    3.659
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.499    3.545
 grid_integrate_task_list           110 12.3    3.265    3.430    3.265    3.430
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.337    3.384
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.300    3.301
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.089    3.225
 fft_wrap_pw1pw2_140                451 13.1    0.531    0.548    3.130    3.173
 density_rs2pw                      110  9.6    0.004    0.004    2.932    3.114
 calculate_dm_sparse                110  9.5    0.001    0.001    2.989    3.026
 multiply_cannon_metrocomm3       24660 15.4    0.038    0.039    1.343    3.016
 hybrid_alltoall_any               4261 16.3    0.122    0.463    2.312    2.988
 wfi_extrapolate                     11  7.9    0.001    0.001    2.962    2.962
 make_images_data                  4110 15.4    0.049    0.052    2.611    2.950
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.744    2.867
 mp_alltoall_i22                    605 13.7    1.644    2.801    1.644    2.801
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.779    2.781
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.768    2.780
 cp_fm_diag_elpa_base                48 14.0    2.620    2.680    2.766    2.779
 cp_fm_cholesky_invert               11 10.9    2.584    2.593    2.584    2.593
 multiply_cannon_sync_h2d         24660 15.4    2.354    2.523    2.354    2.523
 acc_transpose_blocks             24660 15.4    0.109    0.113    2.411    2.517
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.426    2.466
 fft3d_ps                          1111 14.6    1.153    1.198    2.445    2.461
 grid_collocate_task_list           110  9.6    2.264    2.397    2.264    2.397
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.376    2.379
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    2.234    2.234
 potential_pw2rs                    110 12.3    0.013    0.013    2.015    2.019
 mp_alltoall_d11v                  2046 13.8    1.725    1.937    1.725    1.937
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.784    1.824
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.617    1.722
 cp_fm_cholesky_decompose            22 10.9    1.619    1.664    1.619    1.664
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.627    1.637
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.549    1.565
 multiply_cannon_metrocomm4       20550 15.4    0.059    0.062    0.860    1.522
 mp_sum_l                          6594 12.7    0.912    1.507    0.912    1.507
 acc_transpose_blocks_sync        73980 16.4    1.390    1.490    1.390    1.490
 mp_irecv_dv                      62702 16.1    0.754    1.440    0.754    1.440
 transfer_rs2pw                     451 10.6    0.005    0.006    1.149    1.378
 mp_waitany                       13376 13.8    1.022    1.317    1.022    1.317
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=65.161000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=686.363636, yerr=9.167856
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             855.597056E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66417.
 MP_Allreduce         9774                    644.
 MP_Sync                52
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.009    0.028   54.761   54.761
 qs_mol_dyn_low                       1  2.0    0.003    0.003   54.538   54.547
 qs_forces                           11  3.9    0.003    0.003   54.473   54.473
 qs_energies                         11  4.9    0.001    0.002   50.827   50.831
 scf_env_do_scf                      11  5.9    0.000    0.001   42.671   42.671
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   35.058   35.059
 velocity_verlet                     10  3.0    0.002    0.002   30.902   30.905
 dbcsr_multiply_generic            2055 12.4    0.107    0.109   23.228   23.416
 qs_scf_new_mos                      99  7.5    0.001    0.001   20.522   20.557
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   20.521   20.556
 ot_scf_mini                         99  9.5    0.002    0.002   19.271   19.295
 multiply_cannon                   2055 13.4    0.237    0.250   17.647   19.143
 multiply_cannon_loop              2055 14.4    0.603    0.625   16.387   16.811
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.622   11.652
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   11.622   11.652
 ot_mini                             99 10.5    0.001    0.001   10.838   10.855
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.335   10.362
 multiply_cannon_multrec           8220 15.4    3.168    4.406    7.642    8.563
 init_scf_loop                       11  6.9    0.000    0.000    7.568    7.570
 mp_waitall_1                    103326 16.6    5.766    7.456    5.766    7.456
 qs_ot_get_derivative                99 11.5    0.001    0.001    7.153    7.177
 sum_up_and_integrate               110 10.3    0.001    0.002    6.185    6.199
 integrate_v_rspace                 110 11.3    0.003    0.003    6.158    6.172
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.966    5.982
 calculate_rho_elec                 110  8.6    0.113    0.113    5.966    5.981
 prepare_preconditioner              11  7.9    0.000    0.000    5.930    5.937
 make_preconditioner                 11  8.9    0.000    0.000    5.930    5.937
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.498    5.574
 dbcsr_mm_accdrv_process          17442 15.9    3.091    4.194    4.334    5.239
 init_scf_run                        11  5.9    0.000    0.001    5.040    5.040
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.039    5.040
 qs_ot_get_p                        110 10.4    0.001    0.001    4.500    4.516
 make_m2s                          4110 13.4    0.037    0.039    4.156    4.450
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.019    3.034    4.445
 make_images                       4110 14.4    0.644    0.705    4.027    4.321
 pw_transfer                       1331 11.6    0.066    0.071    4.048    4.056
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.941    3.952
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.811    3.860
 apply_single                       110 13.6    0.000    0.000    3.811    3.859
 ot_diis_step                        99 11.5    0.012    0.015    3.663    3.663
 grid_integrate_task_list           110 12.3    3.383    3.526    3.383    3.526
 fft_wrap_pw1pw2_140                451 13.1    0.721    0.732    3.430    3.444
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.172    3.175
 density_rs2pw                      110  9.6    0.004    0.004    2.989    3.110
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    2.836    2.837
 cp_fm_cholesky_invert               11 10.9    2.831    2.835    2.831    2.835
 hybrid_alltoall_any               4261 16.3    0.199    0.856    2.227    2.745
 wfi_extrapolate                     11  7.9    0.001    0.001    2.695    2.695
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    2.667    2.667
 make_images_data                  4110 15.4    0.040    0.046    2.237    2.611
 grid_collocate_task_list           110  9.6    2.364    2.555    2.364    2.555
 calculate_dm_sparse                110  9.5    0.001    0.001    2.481    2.516
 multiply_cannon_sync_h2d          8220 15.4    2.385    2.508    2.385    2.508
 fft3d_ps                          1111 14.6    1.290    1.297    2.481    2.499
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.494    2.495
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.341    2.348
 cp_fm_diag_elpa_base                48 14.0    2.284    2.305    2.339    2.346
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.250    2.251
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.065    2.188
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.139    2.155
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.782    2.004
 potential_pw2rs                    110 12.3    0.016    0.016    1.991    1.995
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    1.883    1.895
 mp_alltoall_d11v                  2046 13.8    1.625    1.797    1.625    1.797
 cp_fm_cholesky_decompose            22 10.9    1.634    1.652    1.634    1.652
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.614    1.616
 qs_env_update_s_mstruct             11  6.9    0.002    0.006    1.491    1.607
 mp_allgather_i34                  2055 14.4    0.433    1.519    0.433    1.519
 dbcsr_complete_redistribute        325 12.2    0.554    0.574    1.407    1.508
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.432    1.444
 jit_kernel_multiply                  8 15.7    0.928    1.421    0.928    1.421
 acc_transpose_blocks              8220 15.4    0.040    0.041    1.358    1.395
 qs_create_task_list                 11  7.9    0.001    0.001    1.204    1.303
 generate_qs_task_list               11  8.9    0.374    0.442    1.203    1.302
 transfer_rs2pw                     451 10.6    0.005    0.005    1.093    1.241
 mp_waitany                        9240 13.8    1.045    1.196    1.045    1.196
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.135    1.162
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=54.761000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=805.454545, yerr=12.830104
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.364308E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67098.
 MP_Allreduce         9752                    812.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.031   89.640   89.641
 qs_mol_dyn_low                       1  2.0    0.003    0.003   89.344   89.355
 qs_forces                           11  3.9    0.003    0.003   89.278   89.278
 qs_energies                         11  4.9    0.004    0.004   85.096   85.098
 scf_env_do_scf                      11  5.9    0.000    0.001   75.003   75.003
 velocity_verlet                     10  3.0    0.002    0.002   57.118   57.124
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   46.160   46.162
 dbcsr_multiply_generic            2055 12.4    0.122    0.127   30.184   30.327
 init_scf_loop                       11  6.9    0.000    0.000   28.770   28.773
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.946   28.036
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.945   28.035
 prepare_preconditioner              11  7.9    0.000    0.000   26.707   26.720
 make_preconditioner                 11  8.9    0.000    0.000   26.707   26.720
 ot_scf_mini                         99  9.5    0.002    0.002   26.136   26.218
 make_full_inverse_cholesky          11  9.9    0.000    0.000   21.077   26.176
 multiply_cannon                   2055 13.4    0.333    0.357   22.736   23.384
 multiply_cannon_loop              2055 14.4    0.822    0.831   20.887   21.301
 cp_fm_upper_to_full                 70 14.2   12.735   18.085   12.735   18.085
 ot_mini                             99 10.5    0.001    0.001   14.700   14.777
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.185   14.276
 qs_ks_build_kohn_sham_matrix       110  9.3    0.014    0.014   14.184   14.276
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.853   12.936
 dbcsr_complete_redistribute        325 12.2    1.010    1.027    7.231   10.290
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.897    9.977
 multiply_cannon_multrec           8220 15.4    4.085    4.216    9.773    9.959
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.202    9.267
 mp_waitall_1                     84994 16.7    7.889    8.992    7.889    8.992
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.614    8.650
 mp_alltoall_i22                    605 13.7    5.254    8.331    5.254    8.331
 qs_rho_update_rho_low              110  7.6    0.000    0.001    7.605    7.645
 calculate_rho_elec                 110  8.6    0.222    0.223    7.605    7.644
 sum_up_and_integrate               110 10.3    0.002    0.002    7.044    7.062
 integrate_v_rspace                 110 11.3    0.004    0.004    7.016    7.034
 qs_ot_get_p                        110 10.4    0.001    0.001    6.062    6.158
 make_m2s                          4110 13.4    0.043    0.043    5.505    6.029
 pw_transfer                       1331 11.6    0.075    0.075    5.953    5.964
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.010    5.836    5.847
 make_images                       4110 14.4    0.877    0.934    5.315    5.838
 init_scf_run                        11  5.9    0.000    0.001    5.809    5.809
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.809    5.809
 dbcsr_mm_accdrv_process          11614 15.7    3.860    4.125    5.539    5.729
 cp_fm_cholesky_invert               11 10.9    5.703    5.709    5.703    5.709
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.907    5.414
 apply_single                       110 13.6    0.000    0.000    4.907    5.414
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.019    5.078    5.404
 fft_wrap_pw1pw2_140                451 13.1    1.366    1.368    5.099    5.109
 ot_diis_step                        99 11.5    0.015    0.016    4.781    4.781
 qs_ot_p2m_diag                      48 11.0    0.151    0.156    4.378    4.385
 density_rs2pw                      110  9.6    0.004    0.004    4.038    4.054
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.850    3.850
 grid_integrate_task_list           110 12.3    3.687    3.756    3.687    3.756
 hybrid_alltoall_any               4261 16.3    0.263    0.561    2.933    3.687
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    3.668    3.668
 make_images_data                  4110 15.4    0.045    0.048    2.926    3.625
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.097    3.570
 fft3d_ps                          1111 14.6    1.884    1.895    3.541    3.553
 wfi_extrapolate                     11  7.9    0.001    0.001    3.432    3.432
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.223    3.223
 cp_fm_diag_elpa_base                48 14.0    2.687    2.881    3.222    3.222
 calculate_dm_sparse                110  9.5    0.001    0.001    3.187    3.202
 multiply_cannon_sync_h2d          8220 15.4    3.134    3.162    3.134    3.162
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.933    2.936
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.749    2.794
 grid_collocate_task_list           110  9.6    2.675    2.685    2.675    2.685
 potential_pw2rs                    110 12.3    0.021    0.022    2.537    2.543
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.397    2.426
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.253    2.257
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    2.192    2.241
 mp_alltoall_d11v                  2046 13.8    2.166    2.228    2.166    2.228
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.096    2.191
 cp_fm_cholesky_decompose            22 10.9    2.094    2.121    2.094    2.121
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.930    1.939
 qs_create_task_list                 11  7.9    0.001    0.001    1.882    1.926
 generate_qs_task_list               11  8.9    0.733    0.786    1.882    1.925
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.815    1.863
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=89.641000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1230.363636, yerr=58.487408
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022121472       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444702699520       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796573E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.086553E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499744       0.0%      0.0%    100.0%
 number of processed stacks              11851392       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.8
 marketing flops                   143.508480E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             625.242112E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10258080
 MPI messages size (bytes):
  total size                         4.456715E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.459031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3542056              94711185408
     32768 < size <=   131072             1282176              73356279808
    131072 < size <=  4194304             5107038            3151762421624
   4194304 < size <= 16777216              259842            1136842803272
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4007                  57688.
 MP_Allreduce        11097                    796.
 MP_Sync                86
 MP_Alltoall          2210                2272107.
 MP_SendRecv         24130                  18752.
 MP_ISendRecv        24130                  18752.
 MP_Wait             42150
 MP_ISend            15900                 108037.
 MP_IRecv            15900                 108037.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.045  205.256  205.259
 qs_mol_dyn_low                       1  2.0    0.007    0.012  204.500  204.565
 qs_forces                           11  3.9    0.005    0.006  204.350  204.354
 qs_energies                         11  4.9    0.004    0.005  198.763  198.782
 scf_env_do_scf                      11  5.9    0.001    0.001  182.369  182.373
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  162.085  162.087
 dbcsr_multiply_generic            2485 12.5    0.177    0.182  125.429  126.691
 qs_scf_new_mos                     116  7.6    0.001    0.001  123.377  123.607
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001  123.376  123.606
 velocity_verlet                     10  3.0    0.005    0.009  121.906  121.909
 ot_scf_mini                        116  9.6    0.003    0.004  116.719  116.980
 multiply_cannon                   2485 13.5    0.232    0.241  101.264  104.016
 multiply_cannon_loop              2485 14.5    2.381    2.432   99.186  101.932
 ot_mini                            116 10.6    0.001    0.002   66.547   66.809
 multiply_cannon_multrec          59640 15.5   31.662   33.662   41.371   43.551
 qs_ot_get_derivative               116 11.6    0.001    0.002   41.716   42.000
 rebuild_ks_matrix                  127  8.3    0.001    0.001   33.125   33.621
 qs_ks_build_kohn_sham_matrix       127  9.3    0.015    0.023   33.124   33.620
 mp_waitall_1                    264810 16.5   28.960   31.689   28.960   31.689
 qs_ks_update_qs_env                127  7.6    0.001    0.001   29.699   30.105
 multiply_cannon_sync_h2d         59640 15.5   26.161   28.885   26.161   28.885
 qs_ot_get_p                        127 10.4    0.001    0.001   28.117   28.512
 apply_preconditioner_dbcsr         127 12.6    0.000    0.001   24.264   25.696
 apply_single                       127 13.6    0.001    0.001   24.263   25.696
 ot_diis_step                       116 11.6    0.008    0.012   24.563   24.564
 qs_ot_p2m_diag                      82 11.4    0.079    0.092   21.613   21.682
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002   20.175   20.351
 init_scf_loop                       11  6.9    0.000    0.001   20.211   20.212
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   18.813   18.814
 multiply_cannon_metrocomm3       59640 15.5    0.114    0.119   16.281   18.687
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   15.943   15.980
 cp_fm_diag_elpa_base                82 14.4   15.878   15.909   15.939   15.974
 prepare_preconditioner              11  7.9    0.000    0.000   15.619   15.668
 make_preconditioner                 11  8.9    0.000    0.000   15.619   15.668
 make_full_inverse_cholesky          11  9.9    0.000    0.000   14.839   15.015
 make_m2s                          4970 13.5    0.103    0.112   13.607   14.001
 make_images                       4970 14.5    0.394    0.413   13.424   13.823
 sum_up_and_integrate               127 10.3    0.002    0.004   13.714   13.729
 integrate_v_rspace                 127 11.3    0.003    0.004   13.656   13.676
 qs_rho_update_rho_low              127  7.7    0.001    0.002   12.829   12.927
 calculate_rho_elec                 127  8.7    0.045    0.063   12.828   12.926
 init_scf_run                        11  5.9    0.000    0.001   12.229   12.230
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   12.229   12.230
 mp_sum_l                          7884 12.9    9.327   11.140    9.327   11.140
 dbcsr_mm_accdrv_process         123452 16.2    4.693    4.839    9.280    9.809
 wfi_extrapolate                     11  7.9    0.001    0.005    9.015    9.015
 cp_fm_cholesky_invert               11 10.9    8.865    8.873    8.865    8.873
 calculate_dm_sparse                127  9.5    0.001    0.001    8.464    8.606
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.159    8.317
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    8.025    8.104
 multiply_cannon_metrocomm1       59640 15.5    0.091    0.094    6.378    7.962
 make_images_data                  4970 15.5    0.066    0.072    6.666    7.589
 pw_transfer                       1535 11.6    0.073    0.086    7.420    7.589
 grid_integrate_task_list           127 12.3    6.990    7.407    6.990    7.407
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.012    7.221    7.386
 hybrid_alltoall_any               5155 16.4    0.290    2.235    5.851    7.364
 density_rs2pw                      127  9.7    0.006    0.006    6.538    7.205
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.653    6.665
 fft_wrap_pw1pw2_140                519 13.2    0.852    0.892    6.384    6.553
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.761    5.869
 fft3d_ps                          1281 14.7    2.176    2.812    5.242    5.555
 mp_alltoall_d11v                  2401 14.1    4.257    5.428    4.257    5.428
 grid_collocate_task_list           127  9.7    4.798    5.223    4.798    5.223
 cp_fm_cholesky_decompose            22 10.9    4.566    4.581    4.566    4.581
 potential_pw2rs                    127 12.3    0.009    0.010    4.308    4.320
 mp_sum_d                          4456 12.1    3.480    4.247    3.480    4.247
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=205.259000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=589.727273, yerr=6.209830
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022121472       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444702699520       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796573E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.166472E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499744       0.0%      0.0%    100.0%
 number of processed stacks               5925696       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1131.6
 marketing flops                   143.508480E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             834.007040E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2385600
 MPI messages size (bytes):
  total size                         4.069300E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.705776E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               70188               2295595008
     32768 < size <=   131072              716032              54973693952
    131072 < size <=  4194304             1363760            1386318135296
   4194304 < size <= 16777216              153648            1453842923456
  16777216 < size                           67056            1171888537600
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  58199.
 MP_Allreduce        11086                    960.
 MP_Sync                86
 MP_Alltoall          1955                5671270.
 MP_SendRecv         11938                  47072.
 MP_ISendRecv        11938                  47072.
 MP_Wait             25718
 MP_ISend            11660                 212488.
 MP_IRecv            11660                 212488.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.062    0.118  188.143  188.146
 qs_mol_dyn_low                       1  2.0    0.007    0.010  187.530  187.544
 qs_forces                           11  3.9    0.004    0.005  187.428  187.433
 qs_energies                         11  4.9    0.003    0.006  180.662  180.675
 scf_env_do_scf                      11  5.9    0.001    0.003  164.288  164.298
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.009  131.619  131.622
 velocity_verlet                     10  3.0    0.006    0.012  117.754  117.756
 dbcsr_multiply_generic            2485 12.5    0.187    0.193   96.609   98.004
 qs_scf_new_mos                     116  7.6    0.001    0.001   93.449   93.998
 qs_scf_loop_do_ot                  116  8.6    0.001    0.002   93.448   93.998
 ot_scf_mini                        116  9.6    0.004    0.005   88.676   89.205
 multiply_cannon                   2485 13.5    0.499    0.551   76.632   81.255
 multiply_cannon_loop              2485 14.5    1.549    1.630   73.312   75.986
 ot_mini                            116 10.6    0.001    0.001   49.921   50.489
 mp_waitall_1                    212858 16.6   23.736   39.237   23.736   39.237
 multiply_cannon_multrec          29820 15.5   20.850   25.137   31.370   35.892
 rebuild_ks_matrix                  127  8.3    0.001    0.001   32.054   32.751
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.024   32.053   32.750
 init_scf_loop                       11  6.9    0.002    0.010   32.580   32.581
 qs_ks_update_qs_env                127  7.6    0.001    0.001   28.804   29.436
 multiply_cannon_metrocomm3       29820 15.5    0.098    0.104   15.432   29.064
 qs_ot_get_derivative               116 11.6    0.001    0.002   28.108   28.625
 prepare_preconditioner              11  7.9    0.000    0.000   28.258   28.322
 make_preconditioner                 11  8.9    0.001    0.003   28.258   28.322
 make_full_inverse_cholesky          11  9.9    0.000    0.001   26.922   27.460
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   21.915   23.171
 apply_single                       127 13.6    0.001    0.001   21.914   23.171
 ot_diis_step                       116 11.6    0.014    0.015   21.639   21.641
 qs_ot_get_p                        127 10.4    0.001    0.001   20.977   21.637
 multiply_cannon_sync_h2d         29820 15.5   17.954   21.188   17.954   21.188
 cp_fm_cholesky_invert               11 10.9   16.489   16.502   16.489   16.502
 qs_ot_p2m_diag                      82 11.4    0.185    0.213   16.073   16.107
 make_m2s                          4970 13.5    0.085    0.091   14.057   15.682
 make_images                       4970 14.5    1.147    1.340   13.843   15.468
 cp_dbcsr_syevd                      82 12.4    0.010    0.040   14.885   14.886
 sum_up_and_integrate               127 10.3    0.002    0.004   13.909   13.936
 integrate_v_rspace                 127 11.3    0.003    0.004   13.849   13.878
 qs_rho_update_rho_low              127  7.7    0.001    0.001   13.256   13.288
 calculate_rho_elec                 127  8.7    0.086    0.104   13.255   13.287
 cp_fm_diag_elpa                     82 13.4    0.001    0.002   11.694   11.723
 cp_fm_diag_elpa_base                82 14.4   11.437   11.528   11.684   11.709
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002   11.099   11.493
 init_scf_run                        11  5.9    0.000    0.001   11.458   11.459
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   11.457   11.459
 multiply_cannon_metrocomm4       27335 15.5    0.098    0.114    3.808   11.018
 mp_irecv_dv                      68888 16.3    3.606   10.622    3.606   10.622
 dbcsr_mm_accdrv_process          61726 16.2    5.410    6.035    9.972   10.497
 make_images_data                  4970 15.5    0.065    0.075    8.260   10.201
 hybrid_alltoall_any               5155 16.4    0.343    1.479    6.959    9.613
 pw_transfer                       1535 11.6    0.084    0.094    8.509    8.561
 wfi_extrapolate                     11  7.9    0.001    0.001    8.352    8.352
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    8.288    8.342
 grid_integrate_task_list           127 12.3    7.146    7.503    7.146    7.503
 fft_wrap_pw1pw2_140                519 13.2    0.925    0.953    7.337    7.400
 density_rs2pw                      127  9.7    0.006    0.006    7.007    7.351
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.264    7.039
 cp_fm_cholesky_decompose            22 10.9    6.895    6.984    6.895    6.984
 calculate_dm_sparse                127  9.5    0.001    0.001    6.422    6.572
 mp_sum_l                          7884 12.9    4.167    6.219    4.167    6.219
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.084    6.092
 fft3d_ps                          1281 14.7    2.804    2.981    5.865    5.924
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.001    5.197    5.412
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    5.320    5.397
 grid_collocate_task_list           127  9.7    4.971    5.278    4.971    5.278
 mp_allgather_i34                  2485 14.5    1.864    4.956    1.864    4.956
 mp_alltoall_d11v                  2401 14.1    4.126    4.744    4.126    4.744
 potential_pw2rs                    127 12.3    0.015    0.018    4.452    4.466
 mp_sum_d                          4454 12.1    2.687    4.060    2.687    4.060
 dbcsr_complete_redistribute        393 12.7    0.765    0.850    3.112    3.938
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=188.146000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=795.454545, yerr=1.724879
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.928533E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks               3984192       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1695.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             957.014016E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1042912
 MPI messages size (bytes):
  total size                         2.716210E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.604448E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              281856              36943429632
    131072 < size <=  4194304              660064             996105256960
   4194304 < size <= 16777216               65632             931531265168
  16777216 < size                           28672             751619276800
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  58208.
 MP_Allreduce        11082                    999.
 MP_Sync                87
 MP_Alltoall          1712                9388896.
 MP_SendRecv          7936                  75008.
 MP_ISendRecv         7936                  75008.
 MP_Wait             21820
 MP_ISend            11748                 275205.
 MP_IRecv            11748                 275205.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.010    0.028  172.882  172.883
 qs_mol_dyn_low                       1  2.0    0.004    0.011  172.505  172.519
 qs_forces                           11  3.9    0.004    0.005  172.410  172.412
 qs_energies                         11  4.9    0.002    0.002  165.878  165.888
 scf_env_do_scf                      11  5.9    0.001    0.001  150.527  150.528
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  116.042  116.042
 velocity_verlet                     10  3.0    0.003    0.011  111.092  111.094
 dbcsr_multiply_generic            2507 12.6    0.186    0.190   81.533   82.659
 qs_scf_new_mos                     117  7.6    0.001    0.001   79.285   79.618
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   79.284   79.617
 ot_scf_mini                        117  9.6    0.003    0.004   75.060   75.458
 multiply_cannon                   2507 13.6    0.507    0.528   62.128   67.342
 multiply_cannon_loop              2507 14.6    1.126    1.187   59.216   61.802
 ot_mini                            117 10.6    0.001    0.001   42.445   42.862
 mp_waitall_1                    170520 16.6   24.350   34.552   24.350   34.552
 init_scf_loop                       11  6.9    0.000    0.000   34.390   34.390
 rebuild_ks_matrix                  128  8.3    0.001    0.001   29.969   30.488
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   29.968   30.487
 prepare_preconditioner              11  7.9    0.000    0.000   30.435   30.475
 make_preconditioner                 11  8.9    0.000    0.000   30.435   30.475
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.092   29.456
 qs_ks_update_qs_env                128  7.6    0.001    0.001   27.006   27.472
 multiply_cannon_multrec          20056 15.6   13.190   15.958   22.517   25.337
 multiply_cannon_metrocomm3       20056 15.6    0.061    0.066   15.222   24.806
 qs_ot_get_derivative               117 11.6    0.001    0.002   22.954   23.359
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.521   20.572
 apply_single                       128 13.6    0.001    0.001   19.521   20.572
 ot_diis_step                       117 11.6    0.018    0.018   19.377   19.378
 qs_ot_get_p                        128 10.4    0.001    0.001   18.041   18.537
 make_m2s                          5014 13.6    0.078    0.084   14.375   15.622
 make_images                       5014 14.6    1.137    1.219   14.139   15.382
 multiply_cannon_sync_h2d         20056 15.6   13.655   15.101   13.655   15.101
 cp_fm_cholesky_invert               11 10.9   14.173   14.182   14.173   14.182
 sum_up_and_integrate               128 10.3    0.002    0.003   13.812   13.834
 qs_ot_p2m_diag                      83 11.4    0.265    0.272   13.769   13.779
 integrate_v_rspace                 128 11.3    0.003    0.004   13.753   13.773
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.422   13.465
 calculate_rho_elec                 128  8.7    0.129    0.143   13.422   13.464
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   12.721   12.722
 init_scf_run                        11  5.9    0.000    0.001   10.345   10.346
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.345   10.345
 make_images_data                  5014 15.6    0.064    0.074    8.584   10.231
 hybrid_alltoall_any               5200 16.5    0.445    2.023    7.400    9.890
 cp_fm_diag_elpa                     83 13.4    0.000    0.000    9.592    9.615
 cp_fm_diag_elpa_base                83 14.4    9.188    9.344    9.589    9.612
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.042    9.316
 multiply_cannon_metrocomm4       17549 15.6    0.065    0.073    3.422    9.193
 dbcsr_mm_accdrv_process          41502 16.2    5.626    5.935    8.783    8.952
 mp_irecv_dv                      50230 16.2    3.293    8.935    3.293    8.935
 pw_transfer                       1547 11.6    0.084    0.101    8.682    8.785
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.010    8.459    8.566
 grid_integrate_task_list           128 12.3    7.296    7.704    7.296    7.704
 fft_wrap_pw1pw2_140                523 13.2    1.055    1.093    7.546    7.658
 wfi_extrapolate                     11  7.9    0.001    0.001    7.350    7.350
 cp_fm_cholesky_decompose            22 10.9    7.206    7.275    7.206    7.275
 density_rs2pw                      128  9.7    0.005    0.006    6.785    7.226
 cp_fm_upper_to_full                105 14.8    5.644    7.112    5.644    7.112
 dbcsr_complete_redistribute        395 12.7    1.164    1.192    4.504    6.203
 calculate_dm_sparse                128  9.5    0.001    0.001    5.847    5.959
 fft3d_ps                          1291 14.7    2.814    3.048    5.779    5.868
 grid_collocate_task_list           128  9.7    5.163    5.641    5.163    5.641
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.371    5.376
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.671    5.336
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.372    5.066
 mp_alltoall_d11v                  2415 14.1    4.306    5.050    4.306    5.050
 mp_sum_l                          7950 12.9    3.189    4.808    3.189    4.808
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.498    4.630
 mp_allgather_i34                  2507 14.6    1.425    4.472    1.425    4.472
 potential_pw2rs                    128 12.3    0.020    0.022    4.240    4.249
 transfer_fm_to_dbcsr                11  9.9    0.018    0.025    2.323    3.997
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.903    3.935
 mp_alltoall_i22                    716 14.1    1.913    3.802    1.913    3.802
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.729    3.730
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.459    3.513
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=172.883000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=909.272727, yerr=4.350359
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.353788E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               5977344       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.143235E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1143192
 MPI messages size (bytes):
  total size                         2.023815E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.770319E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              319024              36042702848
    131072 < size <=  4194304              715736             785529176064
   4194304 < size <= 16777216               70320             665379241840
  16777216 < size                           30720             536870912000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58348.
 MP_Allreduce        11057                   1083.
 MP_Sync                87
 MP_Alltoall          1712               12503084.
 MP_SendRecv          5888                  75008.
 MP_ISendRecv         5888                  75008.
 MP_Wait             22442
 MP_ISend            14952                 244818.
 MP_IRecv            14952                 244818.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.030  187.843  187.844
 qs_mol_dyn_low                       1  2.0    0.004    0.011  187.486  187.500
 qs_forces                           11  3.9    0.004    0.004  187.356  187.363
 qs_energies                         11  4.9    0.002    0.003  180.222  180.232
 scf_env_do_scf                      11  5.9    0.001    0.001  163.510  163.526
 velocity_verlet                     10  3.0    0.003    0.015  124.325  124.329
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.009  117.539  117.541
 qs_scf_new_mos                     117  7.6    0.001    0.001   81.090   81.380
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   81.090   81.379
 dbcsr_multiply_generic            2507 12.6    0.193    0.200   80.471   81.148
 ot_scf_mini                        117  9.6    0.003    0.003   76.546   76.880
 multiply_cannon                   2507 13.6    0.553    0.587   55.904   58.930
 multiply_cannon_loop              2507 14.6    1.821    1.915   52.098   54.000
 init_scf_loop                       11  6.9    0.000    0.000   45.849   45.850
 ot_mini                            117 10.6    0.001    0.001   42.964   43.282
 prepare_preconditioner              11  7.9    0.000    0.000   41.781   41.820
 make_preconditioner                 11  8.9    0.000    0.000   41.781   41.820
 make_full_inverse_cholesky          11  9.9    0.010    0.021   35.560   40.512
 multiply_cannon_multrec          30084 15.6   13.600   18.955   26.519   31.432
 rebuild_ks_matrix                  128  8.3    0.001    0.001   29.374   29.679
 qs_ks_build_kohn_sham_matrix       128  9.3    0.016    0.018   29.373   29.678
 qs_ks_update_qs_env                128  7.6    0.001    0.001   26.488   26.757
 mp_waitall_1                    147882 16.7   17.060   26.159   17.060   26.159
 qs_ot_get_derivative               117 11.6    0.001    0.002   23.431   23.768
 make_m2s                          5014 13.6    0.092    0.098   20.137   21.307
 make_images                       5014 14.6    1.922    2.254   19.827   20.995
 qs_ot_get_p                        128 10.4    0.001    0.001   19.795   20.150
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   18.992   19.461
 apply_single                       128 13.6    0.001    0.001   18.992   19.461
 ot_diis_step                       117 11.6    0.017    0.018   19.414   19.416
 cp_fm_upper_to_full                105 14.8   11.527   16.907   11.527   16.907
 cp_fm_cholesky_invert               11 10.9   15.906   15.915   15.906   15.915
 qs_ot_p2m_diag                      83 11.4    0.342    0.389   15.553   15.603
 multiply_cannon_metrocomm3       30084 15.6    0.049    0.051    6.331   15.276
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   14.150   14.152
 sum_up_and_integrate               128 10.3    0.002    0.003   14.045   14.076
 integrate_v_rspace                 128 11.3    0.003    0.004   13.985   14.018
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.888   13.938
 calculate_rho_elec                 128  8.7    0.170    0.186   13.888   13.937
 dbcsr_mm_accdrv_process          62264 16.2    8.547    9.207   12.487   12.998
 dbcsr_complete_redistribute        395 12.7    1.483    1.620    8.933   12.670
 make_images_data                  5014 15.6    0.065    0.072   10.757   12.521
 hybrid_alltoall_any               5200 16.5    0.528    2.190    9.582   11.751
 copy_fm_to_dbcsr                   209 11.7    0.001    0.002    7.558   11.315
 multiply_cannon_sync_h2d         30084 15.6   10.475   11.202   10.475   11.202
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   10.921   10.940
 cp_fm_diag_elpa_base                83 14.4    9.935   10.264   10.913   10.932
 init_scf_run                        11  5.9    0.000    0.001   10.780   10.781
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.780   10.781
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.885   10.127
 transfer_fm_to_dbcsr                11  9.9    0.001    0.007    6.199    9.880
 pw_transfer                       1547 11.6    0.086    0.102    9.306    9.367
 mp_alltoall_i22                    716 14.1    5.454    9.329    5.454    9.329
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    9.079    9.145
 fft_wrap_pw1pw2_140                523 13.2    1.248    1.286    8.058    8.150
 grid_integrate_task_list           128 12.3    7.539    7.854    7.539    7.854
 wfi_extrapolate                     11  7.9    0.001    0.001    7.686    7.687
 cp_fm_cholesky_decompose            22 10.9    7.481    7.570    7.481    7.570
 density_rs2pw                      128  9.7    0.005    0.006    6.913    7.249
 multiply_cannon_metrocomm4       25070 15.6    0.079    0.088    2.820    7.054
 mp_irecv_dv                      76098 16.2    2.666    6.769    2.666    6.769
 calculate_dm_sparse                128  9.5    0.001    0.001    6.251    6.351
 fft3d_ps                          1291 14.7    2.999    3.052    6.128    6.188
 grid_collocate_task_list           128  9.7    5.283    5.691    5.283    5.691
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.453    5.491
 mp_alltoall_d11v                  2415 14.1    4.913    5.379    4.913    5.379
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.495    4.592
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.384    4.466
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    4.449    4.450
 potential_pw2rs                    128 12.3    0.023    0.024    4.312    4.320
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    4.205    4.258
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=187.844000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1079.727273, yerr=20.863587
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430456039424       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992003932160       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613072052224       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239176077312       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239176077312       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.233020E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.910120E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806383904       0.0%      0.0%    100.0%
 number of processed stacks               1976928       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3442.9
 marketing flops                   145.650931E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank               1.549238E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  242784
 MPI messages size (bytes):
  total size                         1.341806E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.526748E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              115008              60297314304
   4194304 < size <= 16777216              105840             554906419200
  16777216 < size                           20352             726592466352
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         9010                     51.
 MP_Alltoall          9724                 794507.
 MP_ISend            40420                2100460.
 MP_IRecv            40420                2099564.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4045                  57596.
 MP_Allreduce        11189                   1162.
 MP_Sync                88
 MP_Alltoall          1724               18848034.
 MP_SendRecv          3870                 122880.
 MP_ISendRecv         3870                 122880.
 MP_Wait             16244
 MP_ISend            10760                 423501.
 MP_IRecv            10760                 423501.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.032  169.603  169.604
 qs_mol_dyn_low                       1  2.0    0.003    0.003  169.221  169.235
 qs_forces                           11  3.9    0.004    0.005  169.128  169.130
 qs_energies                         11  4.9    0.002    0.002  161.728  161.734
 scf_env_do_scf                      11  5.9    0.001    0.001  144.711  144.720
 velocity_verlet                     10  3.0    0.002    0.002  111.907  111.911
 scf_env_do_scf_inner_loop          118  6.6    0.005    0.009  109.117  109.119
 dbcsr_multiply_generic            2529 12.6    0.186    0.194   71.952   72.289
 qs_scf_new_mos                     118  7.6    0.001    0.001   71.475   71.568
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   71.474   71.567
 ot_scf_mini                        118  9.6    0.003    0.004   67.024   67.062
 multiply_cannon                   2529 13.6    0.564    0.601   53.372   56.875
 multiply_cannon_loop              2529 14.6    0.809    0.846   50.399   51.061
 ot_mini                            118 10.6    0.001    0.001   37.376   37.407
 init_scf_loop                       11  6.9    0.000    0.000   35.446   35.448
 prepare_preconditioner              11  7.9    0.000    0.000   31.609   31.638
 make_preconditioner                 11  8.9    0.000    0.000   31.609   31.638
 mp_waitall_1                    126876 16.7   24.582   31.095   24.582   31.095
 make_full_inverse_cholesky          11  9.9    0.014    0.025   29.542   29.788
 rebuild_ks_matrix                  129  8.3    0.001    0.001   29.397   29.486
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.019   29.396   29.485
 qs_ks_update_qs_env                129  7.6    0.001    0.001   26.813   26.891
 multiply_cannon_multrec          10116 15.6   10.450   14.945   18.066   21.133
 qs_ot_get_derivative               118 11.6    0.002    0.002   20.357   20.390
 multiply_cannon_metrocomm3       10116 15.6    0.024    0.026   12.516   19.496
 cp_fm_cholesky_invert               11 10.9   18.071   18.076   18.071   18.076
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   17.033   17.147
 apply_single                       129 13.6    0.001    0.001   17.032   17.147
 ot_diis_step                       118 11.6    0.020    0.020   16.948   16.949
 qs_ot_get_p                        129 10.4    0.001    0.001   16.870   16.945
 make_m2s                          5058 13.6    0.065    0.069   14.758   15.656
 make_images                       5058 14.6    2.166    2.606   14.446   15.338
 qs_rho_update_rho_low              129  7.7    0.001    0.001   14.382   14.410
 calculate_rho_elec                 129  8.7    0.255    0.266   14.381   14.409
 sum_up_and_integrate               129 10.3    0.002    0.002   14.216   14.270
 integrate_v_rspace                 129 11.3    0.004    0.004   14.156   14.212
 qs_ot_p2m_diag                      84 11.4    0.502    0.508   13.136   13.152
 cp_dbcsr_syevd                      84 12.4    0.005    0.006   11.963   11.963
 multiply_cannon_sync_h2d         10116 15.6   11.001   11.516   11.001   11.516
 init_scf_run                        11  5.9    0.000    0.001   10.227   10.228
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.227   10.227
 pw_transfer                       1559 11.6    0.086    0.093    9.930    9.966
 hybrid_alltoall_any               5245 16.5    0.847    3.794    8.372    9.880
 make_images_data                  5058 15.6    0.055    0.065    8.534    9.841
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.010    9.706    9.747
 cp_fm_diag_elpa                     84 13.4    0.000    0.000    9.064    9.076
 cp_fm_diag_elpa_base                84 14.4    8.826    8.905    9.061    9.073
 fft_wrap_pw1pw2_140                527 13.2    1.690    1.726    8.619    8.660
 grid_integrate_task_list           129 12.3    7.817    8.095    7.817    8.095
 qs_ot_get_derivative_diag           78 12.4    0.002    0.003    8.010    8.042
 cp_fm_cholesky_decompose            22 10.9    7.921    8.033    7.921    8.033
 dbcsr_mm_accdrv_process          20934 16.1    3.175    4.549    7.264    7.962
 density_rs2pw                      129  9.7    0.005    0.005    6.994    7.459
 multiply_cannon_metrocomm1       10116 15.6    0.030    0.031    4.313    7.434
 wfi_extrapolate                     11  7.9    0.001    0.001    7.231    7.231
 calculate_dm_sparse                129  9.5    0.001    0.001    6.088    6.157
 fft3d_ps                          1301 14.7    3.166    3.263    6.123    6.145
 grid_collocate_task_list           129  9.7    5.604    6.007    5.604    6.007
 dbcsr_complete_redistribute        397 12.7    2.100    2.178    5.109    5.370
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    5.243    5.244
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.072    5.081
 mp_alltoall_d11v                  2429 14.1    4.602    5.037    4.602    5.037
 mp_allgather_i34                  2529 14.6    1.160    4.657    1.160    4.657
 potential_pw2rs                    129 12.3    0.027    0.028    4.336    4.343
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.020    4.080
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.596    3.865
 multiply_cannon_metrocomm4        7587 15.6    0.026    0.029    1.725    3.784
 mp_irecv_dv                      29102 15.9    1.684    3.711    1.684    3.711
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.559    3.622
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    3.578    3.610
 copy_fm_to_dbcsr                   210 11.7    0.002    0.002    3.290    3.592
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.491    3.503
 copy_dbcsr_to_fm                   187 11.8    0.004    0.004    3.382    3.468
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=169.604000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1450.727273, yerr=28.948515
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022950912       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963542011904       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444706349056       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019182452736       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019182452736       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796564E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.606412E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499488       0.0%      0.0%    100.0%
 number of processed stacks               1947808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3442.6
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               3.177820E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   99400
 MPI messages size (bytes):
  total size                         1.127422E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.342272E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               44768              34745614336
   4194304 < size <= 16777216               43984             376564613120
  16777216 < size                           10032             716108490000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3991                  59293.
 MP_Allreduce        11055                   1504.
 MP_Sync                86
 MP_Alltoall          1700               36954339.
 MP_SendRecv          1778                 218624.
 MP_ISendRecv         1778                 218624.
 MP_Wait              9728
 MP_ISend             6360                1080477.
 MP_IRecv             6360                1080477.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.041    0.059  288.724  288.725
 qs_mol_dyn_low                       1  2.0    0.003    0.003  288.147  288.182
 qs_forces                           11  3.9    0.005    0.005  288.047  288.050
 qs_energies                         11  4.9    0.002    0.002  279.143  279.150
 scf_env_do_scf                      11  5.9    0.001    0.001  256.659  256.668
 velocity_verlet                     10  3.0    0.002    0.002  209.068  209.076
 scf_env_do_scf_inner_loop          116  6.6    0.004    0.009  132.342  132.345
 init_scf_loop                       11  6.9    0.000    0.000  124.047  124.053
 prepare_preconditioner              11  7.9    0.000    0.000  119.199  119.218
 make_preconditioner                 11  8.9    0.000    0.000  119.199  119.218
 make_full_inverse_cholesky          11  9.9    0.038    0.039   95.159  116.362
 qs_scf_new_mos                     116  7.6    0.001    0.001   88.008   88.058
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   88.007   88.057
 ot_scf_mini                        116  9.6    0.004    0.004   83.171   83.199
 dbcsr_multiply_generic            2485 12.5    0.215    0.225   80.740   81.119
 cp_fm_upper_to_full                104 14.8   52.922   75.919   52.922   75.919
 multiply_cannon                   2485 13.5    0.672    0.702   58.397   59.400
 multiply_cannon_loop              2485 14.5    1.032    1.043   54.511   55.661
 ot_mini                            116 10.6    0.001    0.001   43.559   43.588
 dbcsr_complete_redistribute        393 12.7    3.985    4.022   29.965   43.020
 copy_fm_to_dbcsr                   208 11.6    0.001    0.002   26.472   39.488
 transfer_fm_to_dbcsr                11  9.9    0.030    0.030   23.997   36.799
 mp_alltoall_i22                    712 14.1   21.760   34.885   21.760   34.885
 rebuild_ks_matrix                  127  8.3    0.001    0.001   34.419   34.450
 qs_ks_build_kohn_sham_matrix       127  9.3    0.018    0.018   34.418   34.449
 cp_fm_cholesky_invert               11 10.9   33.162   33.168   33.162   33.168
 qs_ks_update_qs_env                127  7.6    0.001    0.001   31.922   31.962
 mp_waitall_1                    102768 16.8   27.411   31.599   27.411   31.599
 qs_ot_get_p                        127 10.4    0.001    0.001   24.755   24.807
 qs_ot_get_derivative               116 11.6    0.002    0.002   24.182   24.212
 qs_ot_p2m_diag                      82 11.4    0.868    0.874   20.827   20.856
 multiply_cannon_metrocomm3        9940 15.5    0.025    0.025   18.829   19.956
 ot_diis_step                       116 11.6    0.021    0.022   19.348   19.348
 make_m2s                          4970 13.5    0.072    0.075   17.857   19.259
 cp_dbcsr_syevd                      82 12.4    0.006    0.006   19.056   19.059
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   18.698   18.821
 apply_single                       127 13.6    0.001    0.001   18.698   18.821
 make_images                       4970 14.5    3.030    3.225   17.379   18.781
 multiply_cannon_multrec           9940 15.5   10.167   12.009   17.911   17.982
 qs_rho_update_rho_low              127  7.7    0.001    0.001   17.154   17.174
 calculate_rho_elec                 127  8.7    0.474    0.475   17.154   17.174
 sum_up_and_integrate               127 10.3    0.002    0.002   16.335   16.424
 integrate_v_rspace                 127 11.3    0.005    0.005   16.273   16.361
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   15.944   15.945
 cp_fm_diag_elpa_base                82 14.4   11.616   13.172   15.941   15.941
 multiply_cannon_sync_h2d          9940 15.5   14.207   14.251   14.207   14.251
 pw_transfer                       1535 11.6    0.095    0.096   13.068   13.074
 fft_wrap_pw1pw2                   1281 12.7    0.011    0.011   12.832   12.837
 init_scf_run                        11  5.9    0.000    0.001   12.172   12.173
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.172   12.173
 hybrid_alltoall_any               5155 16.4    1.307    3.061   10.084   12.117
 make_images_data                  4970 15.5    0.063    0.068    9.888   11.943
 fft_wrap_pw1pw2_140                519 13.2    3.203    3.246   11.433   11.440
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.493    9.519
 dbcsr_mm_accdrv_process          20590 16.0    4.244    6.086    7.495    9.326
 wfi_extrapolate                     11  7.9    0.001    0.001    9.032    9.032
 cp_fm_cholesky_decompose            22 10.9    8.843    8.865    8.843    8.865
 grid_integrate_task_list           127 12.3    8.495    8.685    8.495    8.685
 density_rs2pw                      127  9.7    0.005    0.005    8.260    8.316
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    8.170    8.171
 fft3d_ps                          1281 14.7    3.949    3.963    7.323    7.392
 mp_alltoall_d11v                  2401 14.1    6.849    6.982    6.849    6.982
 calculate_dm_sparse                127  9.5    0.001    0.001    6.716    6.777
 grid_collocate_task_list           127  9.7    6.367    6.446    6.367    6.446
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.361    6.415
 copy_dbcsr_to_fm                   185 11.7    0.004    0.004    6.128    6.217
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=288.725000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2891.454545, yerr=157.397796
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.251545E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255646.
 MP_Allreduce         3139                   6114.
 MP_Sync                 4
 MP_Alltoall            54                8020411.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.036   84.914   84.916
 qs_energies                          1  2.0    0.000    0.001   84.422   84.430
 ls_scf                               1  3.0    0.001    0.002   83.518   83.526
 dbcsr_multiply_generic             111  6.7    0.015    0.015   72.203   72.385
 multiply_cannon                    111  7.7    0.017    0.020   55.651   56.655
 multiply_cannon_loop               111  8.7    0.230    0.251   52.285   53.502
 ls_scf_main                          1  4.0    0.000    0.002   52.058   52.060
 density_matrix_trs4                  2  5.0    0.002    0.003   46.541   46.601
 ls_scf_init_scf                      1  4.0    0.000    0.001   28.414   28.417
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   26.968   27.017
 mp_waitall_1                     11031 10.9   22.165   25.292   22.165   25.292
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   24.903   24.921
 multiply_cannon_multrec           2664  9.7    8.130    8.968   15.504   17.388
 multiply_cannon_sync_h2d          2664  9.7   13.612   16.100   13.612   16.100
 make_m2s                           222  7.7    0.009    0.011   12.912   13.329
 make_images                        222  8.7    0.099    0.110   12.890   13.308
 multiply_cannon_metrocomm1        2664  9.7    0.009    0.010    9.608   12.473
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.011    5.390    8.801
 make_images_data                   222  9.7    0.004    0.005    7.497    8.077
 dbcsr_mm_accdrv_process           4760 10.4    0.592    0.703    6.991    8.005
 hybrid_alltoall_any                227 10.6    0.216    1.842    6.484    7.607
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.198    7.117    6.198    7.117
 calculate_norms                   4752  9.8    5.500    6.198    5.500    6.198
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.973    5.124
 mp_sum_l                           887  5.1    3.109    4.781    3.109    4.781
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.016    2.019    4.210
 mp_irecv_dv                       6231 10.9    2.002    4.178    2.002    4.178
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.363    3.843
 make_images_sizes                  222  9.7    0.000    0.000    0.650    3.656
 mp_alltoall_i44                    222 10.7    0.650    3.656    0.650    3.656
 arnoldi_extremal                     4  6.8    0.000    0.000    3.239    3.270
 arnoldi_normal_ev                    4  7.8    0.001    0.003    3.239    3.270
 build_subspace                      16  8.4    0.009    0.012    3.143    3.145
 ls_scf_post                          1  4.0    0.000    0.001    3.046    3.054
 ls_scf_store_result                  1  5.0    0.000    0.000    2.847    2.897
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.411    2.766
 dbcsr_merge_single_wm              555 10.7    0.457    0.576    2.403    2.758
 make_images_pack                   222  9.7    2.207    2.624    2.209    2.625
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.013    2.310    2.555
 dbcsr_sort_data                    658 11.4    2.203    2.519    2.203    2.519
 dbcsr_matrix_vector_mult_local     304 10.0    2.068    2.463    2.070    2.465
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.313    2.420
 buffer_matrices_ensure_size        222  8.7    1.745    2.058    1.745    2.058
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.777    1.781
 rebuild_ks_matrix                    3  7.3    0.000    0.001    1.766    1.770
 qs_ks_build_kohn_sham_matrix         3  8.3    0.003    0.013    1.766    1.770
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=84.916000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1132.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.137133E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  10075.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.029    0.069   90.078   90.080
 qs_energies                          1  2.0    0.000    0.002   89.545   89.548
 ls_scf                               1  3.0    0.000    0.002   88.200   88.203
 dbcsr_multiply_generic             111  6.7    0.015    0.017   74.421   74.696
 multiply_cannon                    111  7.7    0.028    0.041   52.525   57.578
 ls_scf_main                          1  4.0    0.000    0.001   54.655   54.660
 multiply_cannon_loop               111  8.7    0.135    0.146   49.726   53.311
 density_matrix_trs4                  2  5.0    0.002    0.003   48.928   49.144
 ls_scf_init_scf                      1  4.0    0.000    0.001   29.904   29.906
 mp_waitall_1                      9105 10.9   20.576   29.090   20.576   29.090
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.719   28.801
 multiply_cannon_multrec           1332  9.7   13.342   17.046   22.610   27.402
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.002   26.362   26.374
 multiply_cannon_metrocomm3        1332  9.7    0.007    0.008   11.318   20.389
 make_m2s                           222  7.7    0.006    0.008   15.036   15.577
 make_images                        222  8.7    1.370    1.693   15.006   15.548
 dbcsr_mm_accdrv_process           4041 10.4    0.367    0.539    8.859   10.400
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.377    9.861    8.377    9.861
 hybrid_alltoall_any                227 10.6    0.541    2.548    8.057    9.592
 make_images_data                   222  9.7    0.004    0.005    8.735    9.591
 mp_sum_l                           887  5.1    5.389    8.480    5.389    8.480
 multiply_cannon_metrocomm4        1221  9.7    0.007    0.009    3.192    7.805
 mp_irecv_dv                       3311 11.0    3.171    7.749    3.171    7.749
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.184    6.826
 calculate_norms                   2376  9.8    6.062    6.722    6.062    6.722
 multiply_cannon_sync_h2d          1332  9.7    4.884    5.940    4.884    5.940
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.933    5.160
 arnoldi_extremal                     4  6.8    0.000    0.000    4.605    4.622
 arnoldi_normal_ev                    4  7.8    0.001    0.005    4.605    4.622
 build_subspace                      16  8.4    0.014    0.021    4.353    4.356
 ls_scf_post                          1  4.0    0.000    0.001    3.641    3.645
 ls_scf_store_result                  1  5.0    0.000    0.000    3.367    3.464
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.139    3.362
 dbcsr_matrix_vector_mult_local     304 10.0    2.745    3.226    2.747    3.227
 mp_allgather_i34                   111  8.7    0.796    3.062    0.796    3.062
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.632    2.740
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.169    2.514
 dbcsr_data_new                    4174 10.1    2.114    2.389    2.114    2.389
 make_images_pack                   222  9.7    1.825    2.140    1.828    2.142
 dbcsr_sort_data                    436 11.2    1.840    2.091    1.840    2.091
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.858    1.860
 rebuild_ks_matrix                    3  7.3    0.000    0.001    1.843    1.846
 qs_ks_build_kohn_sham_matrix         3  8.3    0.005    0.013    1.843    1.845
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=90.080000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1799.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.816164E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265448.
 MP_Allreduce         3138                  10896.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.040   91.700   91.702
 qs_energies                          1  2.0    0.000    0.002   91.100   91.103
 ls_scf                               1  3.0    0.000    0.002   89.661   89.665
 dbcsr_multiply_generic             111  6.7    0.016    0.016   74.358   74.671
 ls_scf_main                          1  4.0    0.000    0.001   55.975   55.979
 multiply_cannon                    111  7.7    0.034    0.079   51.571   55.676
 multiply_cannon_loop               111  8.7    0.116    0.129   48.871   52.680
 density_matrix_trs4                  2  5.0    0.002    0.003   50.012   50.156
 mp_waitall_1                      7281 11.0   23.138   32.976   23.138   32.976
 ls_scf_init_scf                      1  4.0    0.000    0.001   30.000   30.003
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.781   28.873
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.461   26.473
 multiply_cannon_multrec            888  9.7   12.673   15.272   21.212   24.328
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   10.896   22.626
 make_m2s                           222  7.7    0.006    0.007   16.371   17.026
 make_images                        222  8.7    1.582    1.858   16.333   16.989
 make_images_data                   222  9.7    0.004    0.004    9.515   10.455
 hybrid_alltoall_any                227 10.6    0.642    2.951    8.952   10.143
 dbcsr_mm_accdrv_process           3754 10.4    0.311    0.490    8.076    9.280
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.640    8.790    7.640    8.790
 mp_sum_l                           887  5.1    4.710    7.403    4.710    7.403
 multiply_cannon_sync_h2d           888  9.7    6.055    7.387    6.055    7.387
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.462    7.320
 mp_irecv_dv                       2335 11.1    2.446    7.255    2.446    7.255
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.372    6.097
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.556    5.788
 arnoldi_extremal                     4  6.8    0.000    0.000    5.061    5.076
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.061    5.075
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.777    5.010
 calculate_norms                   1584  9.8    4.413    4.850    4.413    4.850
 build_subspace                      16  8.4    0.014    0.020    4.758    4.764
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.427    3.763
 mp_allgather_i34                   111  8.7    0.819    3.711    0.819    3.711
 ls_scf_post                          1  4.0    0.000    0.001    3.686    3.691
 dbcsr_matrix_vector_mult_local     304 10.0    3.024    3.595    3.026    3.597
 ls_scf_store_result                  1  5.0    0.000    0.000    3.411    3.512
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.871    2.983
 dbcsr_data_new                    4116  9.9    2.099    2.445    2.099    2.445
 dbcsr_sort_data                    325 11.1    1.890    2.182    1.890    2.182
 make_images_sizes                  222  9.7    0.000    0.000    1.017    2.123
 mp_alltoall_i44                    222 10.7    1.016    2.122    1.016    2.122
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.896    1.899
 dbcsr_finalize                     304  7.8    0.026    0.032    1.619    1.888
 make_images_pack                   222  9.7    1.625    1.882    1.628    1.885
 rebuild_ks_matrix                    3  7.3    0.000    0.001    1.874    1.877
 qs_ks_build_kohn_sham_matrix         3  8.3    0.002    0.005    1.874    1.877
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=91.702000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2203.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.343684E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  13030.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.028    0.045   97.000   97.001
 qs_energies                          1  2.0    0.000    0.000   96.414   96.419
 ls_scf                               1  3.0    0.000    0.000   94.755   94.761
 dbcsr_multiply_generic             111  6.7    0.016    0.017   78.565   78.815
 ls_scf_main                          1  4.0    0.000    0.000   59.026   59.027
 multiply_cannon                    111  7.7    0.049    0.098   51.590   56.634
 density_matrix_trs4                  2  5.0    0.002    0.003   52.853   52.990
 multiply_cannon_loop               111  8.7    0.151    0.167   46.550   50.112
 ls_scf_init_scf                      1  4.0    0.000    0.000   32.491   32.494
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   31.299   31.361
 mp_waitall_1                      6369 11.0   22.704   29.899   22.704   29.899
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   28.853   28.865
 multiply_cannon_multrec           1332  9.7   14.141   17.630   22.027   25.272
 make_m2s                           222  7.7    0.007    0.008   21.178   22.675
 make_images                        222  8.7    3.151    3.626   21.127   22.626
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.004    9.150   17.644
 make_images_data                   222  9.7    0.004    0.004   11.791   13.539
 hybrid_alltoall_any                227 10.6    0.797    3.767   11.076   12.862
 dbcsr_mm_accdrv_process           3641 10.4    0.315    0.486    7.529    9.038
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.085    8.546    7.085    8.546
 mp_sum_l                           887  5.1    4.215    7.464    4.215    7.464
 multiply_cannon_metrocomm4        1110  9.7    0.005    0.007    2.091    6.043
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.594    5.996
 mp_irecv_dv                       3229 10.9    2.065    5.954    2.065    5.954
 multiply_cannon_sync_h2d          1332  9.7    5.390    5.902    5.390    5.902
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.196    5.699
 arnoldi_extremal                     4  6.8    0.000    0.000    5.193    5.205
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.193    5.205
 build_subspace                      16  8.4    0.014    0.021    4.868    4.875
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.577    4.746
 calculate_norms                   2376  9.8    4.203    4.632    4.203    4.632
 mp_allgather_i34                   111  8.7    2.155    4.502    2.155    4.502
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.021    3.577    3.866
 dbcsr_matrix_vector_mult_local     304 10.0    3.191    3.688    3.193    3.690
 dbcsr_sort_data                    658 11.4    3.011    3.379    3.011    3.379
 ls_scf_post                          1  4.0    0.000    0.000    3.238    3.242
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.750    3.223
 dbcsr_merge_single_wm              555 10.7    0.529    0.661    2.741    3.214
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.032    3.083
 ls_scf_store_result                  1  5.0    0.000    0.000    2.981    3.040
 dbcsr_data_release               10477 10.7    1.580    2.422    1.580    2.422
 dbcsr_finalize                     304  7.8    0.049    0.061    1.798    1.972
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=97.001000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2693.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.724277E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265536.
 MP_Allreduce         3129                  15263.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.035    0.054   92.122   92.123
 qs_energies                          1  2.0    0.000    0.000   91.376   91.379
 ls_scf                               1  3.0    0.000    0.000   89.353   89.355
 dbcsr_multiply_generic             111  6.7    0.017    0.018   71.015   71.240
 ls_scf_main                          1  4.0    0.000    0.000   56.807   56.807
 multiply_cannon                    111  7.7    0.105    0.174   53.093   56.589
 multiply_cannon_loop               111  8.7    0.088    0.094   50.507   51.705
 density_matrix_trs4                  2  5.0    0.002    0.003   49.808   49.874
 ls_scf_init_scf                      1  4.0    0.000    0.000   29.165   29.168
 mp_waitall_1                      5436 11.0   24.861   28.819   24.861   28.819
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   27.919   27.953
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   25.828   25.841
 multiply_cannon_multrec            444  9.7   13.611   16.663   20.675   22.210
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.984   15.945
 make_m2s                           222  7.7    0.005    0.005   13.390   14.326
 make_images                        222  8.7    2.039    2.487   13.323   14.259
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    6.537   13.187
 make_images_data                   222  9.7    0.003    0.004    8.161    9.646
 hybrid_alltoall_any                227 10.6    0.803    3.809    8.075    9.559
 multiply_cannon_sync_h2d           444  9.7    6.804    8.029    6.804    8.029
 dbcsr_mm_accdrv_process           3003 10.4    0.355    0.432    6.759    7.886
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.391    7.488    6.391    7.488
 arnoldi_extremal                     4  6.8    0.000    0.000    5.774    5.785
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.774    5.785
 build_subspace                      16  8.4    0.015    0.020    5.375    5.383
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.428    4.678
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.021    4.155    4.354
 dbcsr_matrix_vector_mult_local     304 10.0    3.697    4.165    3.699    4.168
 mp_sum_l                           887  5.1    2.517    3.906    2.517    3.906
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.553    3.878
 mp_irecv_dv                       1241 11.2    1.537    3.847    1.537    3.847
 mp_allgather_i34                   111  8.7    1.149    3.704    1.149    3.704
 calculate_norms                    792  9.8    3.618    3.694    3.618    3.694
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.546    3.628
 ls_scf_post                          1  4.0    0.000    0.000    3.380    3.383
 ls_scf_store_result                  1  5.0    0.000    0.000    3.172    3.205
 make_images_sizes                  222  9.7    0.000    0.000    0.846    3.108
 mp_alltoall_i44                    222 10.7    0.846    3.108    0.846    3.108
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.763    2.922
 dbcsr_data_new                    4608  9.7    1.779    2.305    1.779    2.305
 dbcsr_finalize                     304  7.8    0.062    0.077    2.204    2.290
 dbcsr_merge_all                    275  8.9    0.482    0.527    2.063    2.153
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.041    2.042
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.008    2.009
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    2.008    2.009
 qs_energies_init_hamiltonians        1  3.0    0.001    0.001    2.007    2.007
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=92.123000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3730.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.867426E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284089.
 MP_Allreduce         3123                  21388.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.075    0.095  106.110  106.110
 qs_energies                          1  2.0    0.000    0.000  104.784  104.791
 ls_scf                               1  3.0    0.000    0.000  101.815  101.821
 dbcsr_multiply_generic             111  6.7    0.024    0.028   75.498   75.625
 ls_scf_main                          1  4.0    0.000    0.000   63.402   63.403
 density_matrix_trs4                  2  5.0    0.002    0.003   54.382   54.444
 multiply_cannon                    111  7.7    0.110    0.165   49.153   51.129
 multiply_cannon_loop               111  8.7    0.098    0.101   46.194   46.909
 ls_scf_init_scf                      1  4.0    0.000    0.000   34.621   34.622
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   33.058   33.085
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   30.350   30.356
 mp_waitall_1                      4527 11.1   21.701   25.303   21.701   25.303
 make_m2s                           222  7.7    0.005    0.005   22.651   23.726
 make_images                        222  8.7    3.575    3.879   22.544   23.618
 multiply_cannon_multrec            444  9.7   17.837   18.430   22.481   23.055
 hybrid_alltoall_any                227 10.6    1.654    3.611   12.789   15.791
 make_images_data                   222  9.7    0.003    0.004   13.049   15.173
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.047   11.529
 multiply_cannon_sync_h2d           444  9.7    8.789    8.821    8.789    8.821
 arnoldi_extremal                     4  6.8    0.000    0.000    7.345    7.352
 arnoldi_normal_ev                    4  7.8    0.003    0.009    7.345    7.352
 build_subspace                      16  8.4    0.026    0.036    6.795    6.806
 dbcsr_matrix_vector_mult           304  9.0    0.017    0.034    5.436    5.587
 dbcsr_matrix_vector_mult_local     304 10.0    5.030    5.334    5.032    5.337
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.165    5.260
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.860    5.139
 dbcsr_mm_accdrv_process           1814 10.4    0.274    0.385    4.462    4.622
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.108    4.237    4.108    4.237
 ls_scf_post                          1  4.0    0.000    0.000    3.791    3.799
 make_images_sizes                  222  9.7    0.000    0.000    1.467    3.610
 mp_alltoall_i44                    222 10.7    1.467    3.609    1.467    3.609
 ls_scf_store_result                  1  5.0    0.000    0.000    3.511    3.550
 mp_allgather_i34                   111  8.7    1.082    3.473    1.082    3.473
 calculate_norms                    792  9.8    3.231    3.271    3.231    3.271
 dbcsr_finalize                     304  7.8    0.082    0.089    3.071    3.165
 dbcsr_merge_all                    275  8.9    0.884    0.916    2.856    2.945
 qs_energies_init_hamiltonians        1  3.0    0.001    0.001    2.939    2.939
 dbcsr_complete_redistribute          5  7.6    1.439    1.506    2.746    2.876
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.402    2.516
 dbcsr_sort_data                    325 11.1    2.437    2.498    2.437    2.498
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.461    2.463
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.394    2.396
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    2.394    2.396
 dbcsr_data_new                    6591  9.6    1.895    2.355    1.895    2.355
 dbcsr_new_transposed                 4  7.5    0.242    0.253    2.268    2.280
 dbcsr_frobenius_norm                74  6.6    2.056    2.127    2.200    2.238
 dbcsr_add_d                        103  6.2    0.000    0.000    2.124    2.199
 dbcsr_add_anytype                  103  7.2    0.858    0.890    2.123    2.198
 dbcsr_data_release               12724 10.6    1.976    2.195    1.976    2.195
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=106.110000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=7018.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/1aa3ad7d27726d25ce9c9edce445974c62615587_performance_tests/27/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32        7009386627072       0.0%      0.0%    100.0%
 flops     9 x     9 x    32        7335108845568       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        9866241589248       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        9884108906496       0.0%      0.0%    100.0%
 flops    22 x    22 x    32       13354440523776       0.0%      0.0%    100.0%
 flops    32 x    32 x     9       20607185977344       0.0%      0.0%    100.0%
 flops    32 x    32 x    22       25186560638976       0.0%      0.0%    100.0%
 flops     9 x    32 x    32       28458319085568       0.0%      0.0%    100.0%
 flops    22 x    32 x    32       34782389993472       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       42881542373376       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       55680402235392       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       55680402235392       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       72328573419520       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       383.054662E+12       0.0%      0.0%    100.0%
 flops max/rank                    733.641090E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                        26899403712       0.0%      0.0%    100.0%
 number of processed stacks             118860288       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     226.3
 marketing flops                   780.439111E+12
 -------------------------------------------------------------------------------
 # multiplications                           1445
 max memory usage/rank             580.390912E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged               102097920
 MPI messages size (bytes):
  total size                        37.227590E+12
  min size                           0.000000E+00
  max size                           4.551360E+06
  average size                     364.626312E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              731472                        0
       128 < size <=     8192            11922720              97670922240
      8192 < size <=    32768            24718992             614677610496
     32768 < size <=   131072            20000256            1970081366016
    131072 < size <=  4194304            42515668           24886801223040
   4194304 < size <= 16777216             2208812            9656099886720
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4640                  78072.
 MP_Allreduce        13232                   2081.
 MP_Sync              1064
 MP_Alltoall          2588                3723339.
 MP_SendRecv        168740                  11136.
 MP_ISendRecv        92040                  11136.
 MP_Wait            102830
 MP_comm_split          40
 MP_ISend            26090                  85106.
 MP_IRecv            37890                  59644.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.019    0.066  224.276  224.280
 qs_mol_dyn_low                       1  2.0    0.004    0.014  223.160  223.183
 qs_forces                            5  3.8    0.004    0.005  223.077  223.081
 qs_energies                          5  4.8    0.003    0.005  220.210  220.233
 scf_env_do_scf                       5  5.8    0.000    0.001  206.311  206.315
 scf_env_do_scf_inner_loop          105  6.6    0.002    0.006  180.243  180.246
 qs_scf_new_mos                     105  7.6    0.000    0.001  141.469  141.658
 qs_scf_loop_do_ot                  105  8.6    0.001    0.001  141.469  141.658
 dbcsr_multiply_generic            1445 12.2    0.129    0.138  132.681  133.460
 ot_scf_mini                        105  9.6    0.003    0.003  131.579  131.731
 multiply_cannon                   1445 13.2    0.273    0.285  113.882  116.114
 multiply_cannon_loop              1445 14.2    2.840    2.983  112.264  114.662
 velocity_verlet                      4  3.0    0.002    0.020  105.650  105.663
 ot_mini                            105 10.6    0.001    0.001   60.193   60.291
 multiply_cannon_multrec          69360 15.2   29.766   34.781   39.655   44.671
 mp_waitall_1                    488190 16.1   34.871   42.464   34.871   42.464
 qs_ot_get_p                        112 10.4    0.001    0.001   41.404   41.736
 qs_ot_get_derivative                55 11.6    0.001    0.001   38.542   38.658
 multiply_cannon_sync_h2d         69360 15.2   29.108   34.184   29.108   34.184
 multiply_cannon_metrocomm3       69360 15.2    0.198    0.210   25.900   33.644
 qs_ot_p2m_diag                      40 11.0    0.020    0.030   30.399   30.541
 rebuild_ks_matrix                  110  8.4    0.000    0.000   28.280   28.435
 qs_ks_build_kohn_sham_matrix       110  9.4    0.011    0.013   28.280   28.435
 cp_dbcsr_syevd                      40 12.0    0.002    0.002   27.227   27.227
 qs_ks_update_qs_env                112  7.6    0.001    0.001   25.984   26.131
 init_scf_loop                        7  6.6    0.000    0.000   26.039   26.040
 apply_preconditioner_dbcsr          62 12.6    0.000    0.000   22.955   23.243
 apply_single                        62 13.6    0.000    0.000   22.955   23.243
 cp_fm_syevd                         40 13.0    0.000    0.001   22.220   22.350
 prepare_preconditioner               7  7.6    0.000    0.000   21.311   21.342
 make_preconditioner                  7  8.6    0.000    0.000   21.311   21.342
 ot_new_cg_direction                 55 11.6    0.001    0.001   20.951   20.951
 cp_fm_redistribute_end              40 14.0    8.783   17.527    8.787   17.528
 cp_fm_syevd_base                    40 14.0    8.734   17.481    8.734   17.481
 qs_rho_update_rho_low              110  7.6    0.000    0.001   15.945   16.280
 calculate_rho_elec                 110  8.6    0.029    0.032   15.944   16.280
 qs_ot_get_orbitals                 105 10.6    0.001    0.001   14.858   15.034
 make_full_inverse_cholesky           7  9.6    0.000    0.000   14.396   14.465
 qs_ot_get_derivative_taylor         37 12.8    0.001    0.001   13.960   14.061
 mp_sum_l                          4764 12.2   12.273   13.195   12.273   13.195
 calculate_dm_sparse                110  9.5    0.000    0.000   11.496   11.665
 pw_transfer                       1645 12.4    0.079    0.097   11.262   11.424
 init_scf_run                         5  5.8    0.000    0.000   11.312   11.313
 scf_env_initial_rho_setup            5  6.8    0.002    0.003   11.312   11.313
 fft_wrap_pw1pw2                   1425 13.5    0.012    0.014   11.123   11.292
 density_rs2pw                      110  9.6    0.005    0.006   10.398   10.796
 dbcsr_mm_accdrv_process         154766 15.8    6.216    6.438    9.757   10.575
 qs_ot_get_derivative_diag           18 12.0    0.000    0.001   10.270   10.343
 qs_vxc_create                      110 10.4    0.002    0.003   10.188   10.219
 cp_fm_cholesky_invert                7 10.6   10.160   10.168   10.160   10.168
 fft_wrap_pw1pw2_240                915 15.0    0.837    0.921    9.778    9.966
 multiply_cannon_metrocomm1       69360 15.2    0.095    0.101    4.912    9.656
 check_diag                          80 13.5    8.684    8.934    9.378    9.508
 sum_up_and_integrate                60 10.3    0.001    0.002    8.157    8.167
 integrate_v_rspace                  60 11.3    0.001    0.002    8.140    8.151
 fft3d_pb                           915 16.0    2.395    2.591    7.955    8.117
 transfer_rs2pw                     445 10.6    0.007    0.008    7.227    7.654
 acc_transpose_blocks             69360 15.2    0.349    0.369    6.764    7.568
 xc_rho_set_and_dset_create         110 12.4    0.075    0.095    7.218    7.459
 cp_dbcsr_sm_fm_multiply             15  9.3    0.001    0.001    6.949    6.965
 xc_vxc_pw_create                    60 11.3    0.038    0.049    6.800    6.831
 make_full_single_inverse             7  9.6    0.001    0.001    6.651    6.683
 make_m2s                          2890 13.2    0.078    0.088    6.046    6.630
 cp_dbcsr_sm_fm_multiply_core        15 10.3    0.000    0.000    6.528    6.608
 calculate_first_density_matrix       1  7.0    0.000    0.001    6.544    6.566
 make_images                       2890 14.2    0.237    0.258    5.938    6.522
 xc_pw_derive                       510 13.4    0.005    0.006    5.996    6.065
 mp_alltoall_z22v                  2340 17.7    4.943    5.228    4.943    5.228
 mp_waitany                        7680 13.5    4.304    4.775    4.304    4.775
 acc_transpose_blocks_kernels     69360 16.2    0.850    0.888    4.042    4.740
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="601", plot="h2o_512_md", label="(64n/12r/1t)", y=224.280000, yerr=0.000000
PlotPoint: name="602", plot="h2o_512_md_mem", label="(64n/12r/1t)", y=552.800000, yerr=1.939072
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 1aa3ad7d27726d25ce9c9edce445974c62615587
Summary: empty
Status: OK