=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 523d85c309ba47d334e69c8546862257e289ce38


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10,
#              COSMA 2.6.2, ELPA 2022.11.001, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.0.0, LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.8.1,
#              SIRIUS 7.3.2, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Author: Matthias Krack (26.12.2022)
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.2
USE_ELPA       := 2022.11.001
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.0.0
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.8.1
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.3.2
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.4
HDF5_VER       := 1.12.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   HDF5_VER       := $(strip $(HDF5_VER))
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(HDF5_VER)/lib
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__HDF5
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta_prefixed_scalapack.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/01
 job id: 44060912
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/02
 job id: 44060915
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/03
 job id: 44060918
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/04
 job id: 44060919
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/05
 job id: 44060920
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/06
 job id: 44060921
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/07
 job id: 44060922
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/08
 job id: 44060923
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/09
 job id: 44060924
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/10
 job id: 44060925
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/11
 job id: 44060926
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/12
 job id: 44060927
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/13
 job id: 44060929
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/14
 job id: 44060930
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/15
 job id: 44060932
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/16
 job id: 44060933
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/17
 job id: 44060934
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/18
 job id: 44060935
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/19
 job id: 44060936
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/20
 job id: 44060937
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/21
 job id: 44060938
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/22
 job id: 44060939
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/23
 job id: 44060940
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/24
 job id: 44060941
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/25
 job id: 44060942
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/26
 job id: 44060943
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          344                      9.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.035    0.036  135.335  135.336
 farming_run                          1  2.0  133.900  133.901  135.295  135.296
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.467036E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              228                1113141.
 MP_Allreduce          485                2282278.
 MP_Sync                27
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split           8
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.057  114.558  114.589
 qs_energies                          1  2.0    0.000    0.000  114.319  114.321
 mp2_main                             1  3.0    0.000    0.000  112.477  112.479
 mp2_gpw_main                         1  4.0    0.020    0.026  111.667  111.669
 mp2_ri_gpw_compute_in                1  5.0    0.172    0.173   93.159   93.277
 mp2_ri_gpw_compute_in_loop           1  6.0    0.005    0.005   55.419   55.537
 mp2_eri_3c_integrate_gpw           272  7.0    0.154    0.170   41.622   46.582
 get_2c_integrals                     1  6.0    0.000    0.000   37.097   37.566
 integrate_v_rspace                 273  8.0    0.437    0.451   25.060   29.677
 pw_transfer                       6555 10.6    0.377    0.390   27.286   27.531
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.048   25.951   26.165
 grid_integrate_task_list           273  9.0   20.890   25.942   20.890   25.942
 fft_wrap_pw1pw2_100               2178 12.4    1.181    1.272   23.512   23.766
 compute_2c_integrals                 1  7.0    0.038    0.060   19.452   19.453
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.004   18.851   18.952
 mp2_eri_2c_integrate_gpw             1  9.0    2.391    2.429   18.848   18.948
 rpa_ri_compute_en                    1  5.0    0.001    0.001   18.400   18.484
 cp_fm_cholesky_decompose            12  8.2   17.585   18.060   17.585   18.060
 cholesky_decomp                      1  7.0    0.000    0.000   16.498   16.962
 fft3d_s                           5443 13.4   16.095   16.336   16.117   16.358
 ao_to_mo_and_store_B_mult_1        272  7.0   10.846   15.541   10.846   15.541
 calculate_wavefunction             272  8.0    5.380    5.460   12.477   13.098
 rpa_num_int                          1  6.0    0.000    0.004   10.502   10.511
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.473   10.500
 calc_mat_Q                           8  8.0    0.000    0.000    9.333    9.436
 contract_S_to_Q                      8  9.0    0.000    0.000    8.756    8.861
 calc_potential_gpw                 544  9.5    0.005    0.006    8.224    8.618
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.002    8.193    8.440
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.330    8.419
 parallel_gemm_fm_cosma              14 10.1    8.330    8.419    8.330    8.419
 potential_pw2rs                    545 10.0    0.107    0.109    7.680    8.368
 create_integ_mat                     1  6.0    0.014    0.027    7.684    7.693
 collocate_single_gaussian          272 10.0    0.040    0.042    7.446    7.692
 array2fm                             1  7.0    0.000    0.000    6.627    7.142
 pw_scatter_s                      2720 13.7    4.438    4.616    4.438    4.616
 pw_gather_s                       2722 13.2    3.867    4.233    3.867    4.233
 array2fm_buffer_send                 1  8.0    2.923    3.146    2.923    3.146
 ao_to_mo_and_store_B_E_Ex_1        272  7.0    1.221    1.367    2.173    2.360
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=111.669496, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2739.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          344                     10.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.036  399.280  399.281
 farming_run                          1  2.0  398.489  398.494  399.245  399.247
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827141120       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788822       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.224733E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              703                 408373.
 MP_Allreduce         1821                  23730.
 MP_Sync                38
 MP_Alltoall            77               14123339.
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.045  209.526  209.526
 qs_energies                          1  2.0    0.000    0.000  209.266  209.282
 scf_env_do_scf                       1  3.0    0.000    0.000  106.450  106.450
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  105.602  105.611
 rebuild_ks_matrix                    4  6.0    0.000    0.000  105.601  105.610
 qs_ks_build_kohn_sham_matrix         4  7.0    0.054    0.063  105.601  105.610
 hfx_ks_matrix                        4  8.0    0.001    0.001  105.232  105.235
 integrate_four_center                4  9.0    0.143    0.455  105.231  105.234
 mp2_main                             1  3.0    0.000    0.000  102.536  102.552
 mp2_gpw_main                         1  4.0    0.032    0.044  101.705  101.721
 integrate_four_center_main           4 10.0    0.129    0.605   96.815   99.297
 integrate_four_center_bin          269 11.0   96.686   99.229   96.686   99.229
 init_scf_loop                        1  4.0    0.000    0.000   91.994   91.994
 mp2_ri_gpw_compute_in                1  5.0    0.064    0.064   74.981   76.086
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   54.551   55.655
 mp2_eri_3c_integrate_gpw            91  7.0    0.145    0.159   42.015   47.311
 integrate_v_rspace                  95  8.0    0.399    0.565   28.382   33.507
 pw_transfer                       2240 10.6    0.145    0.181   29.967   30.502
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.022   28.943   29.503
 grid_integrate_task_list            95  9.0   23.672   28.908   23.672   28.908
 ao_to_mo_and_store_B_mult_1         91  7.0   10.847   28.461   10.847   28.461
 mp2_ri_gpw_compute_en                1  5.0    0.060    0.073   26.578   28.295
 fft_wrap_pw1pw2_100                730 12.4    1.321    1.488   26.682   27.226
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.846    1.902   24.825   24.835
 get_2c_integrals                     1  6.0    0.000    0.000   20.343   20.365
 compute_2c_integrals                 1  7.0    0.002    0.003   19.327   19.330
 compute_2c_integrals_loop_lm         1  8.0    0.001    0.002   19.003   19.200
 mp2_eri_2c_integrate_gpw             1  9.0    1.723    1.865   19.002   19.198
 fft3d_s                           1823 13.4   18.429   18.709   18.442   18.723
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.000   14.455   14.455
 calculate_wavefunction              91  8.0    2.011    2.040    9.738    9.975
 potential_pw2rs                    186 10.0    0.034    0.036    8.641    9.236
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.557    0.589    8.761    9.186
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001    8.329    8.625
 local_gemm                         172  8.0    8.204    8.615    8.204    8.615
 calc_potential_gpw                 182  9.5    0.002    0.003    8.000    8.254
 mp2_ri_gpw_compute_en_comm          22  7.0    0.503    0.520    7.827    8.200
 collocate_single_gaussian           91 10.0    0.017    0.025    7.934    8.131
 mp2_ri_gpw_compute_en_ener         172  7.0    6.346    6.420    6.346    6.420
 mp_sendrecv_dm3                   2068  8.0    5.852    6.202    5.852    6.202
 mp_sync                             38 10.4    3.070    5.588    3.070    5.588
 pw_gather_s                        912 13.2    4.880    5.495    4.880    5.495
 pw_scatter_s                       910 13.7    3.948    4.412    3.948    4.412
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=101.696567, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1510.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             451.129344E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62385.
 MP_Allreduce        10249                    271.
 MP_Sync               580
 MP_Alltoall          2083                1117500.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.028   58.666   58.667
 qs_mol_dyn_low                       1  2.0    0.003    0.006   57.125   57.133
 qs_forces                           11  3.9    0.002    0.003   57.054   57.056
 qs_energies                         11  4.9    0.002    0.006   55.601   55.613
 scf_env_do_scf                      11  5.9    0.000    0.001   44.346   44.346
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   41.444   41.444
 dbcsr_multiply_generic            2286 12.5    0.095    0.109   33.796   34.258
 qs_scf_new_mos                     108  7.5    0.000    0.001   31.613   31.925
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   31.613   31.925
 ot_scf_mini                        108  9.5    0.002    0.002   29.959   30.181
 multiply_cannon                   2286 13.5    0.185    0.194   26.536   28.247
 multiply_cannon_loop              2286 14.5    1.509    1.601   25.871   27.578
 velocity_verlet                     10  3.0    0.001    0.001   25.129   25.130
 ot_mini                            108 10.5    0.001    0.001   18.878   19.145
 qs_ot_get_derivative               108 11.5    0.001    0.001   15.913   16.129
 mp_waitall_1                    245248 16.5    8.406   15.109    8.406   15.109
 multiply_cannon_metrocomm3       54864 15.5    0.068    0.074    6.023   13.594
 multiply_cannon_multrec          54864 15.5    4.222    6.557    7.787   11.119
 init_scf_run                        11  5.9    0.000    0.001    9.927    9.927
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    9.927    9.927
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.101    8.281
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.011    8.101    8.281
 calculate_first_density_matrix       1  7.0    0.007    0.057    7.836    7.845
 qs_ot_get_p                        119 10.4    0.001    0.001    7.027    7.375
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.192    7.355
 multiply_cannon_sync_h2d         54864 15.5    5.893    7.335    5.893    7.335
 mp_sum_l                          7207 12.9    5.389    7.137    5.389    7.137
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.337    5.795
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    5.366    5.499
 parallel_gemm_fm                    81  9.0    0.000    0.000    5.081    5.085
 parallel_gemm_fm_cosma              81 10.0    5.081    5.085    5.081    5.085
 dbcsr_mm_accdrv_process          76910 16.1    1.188    1.851    3.487    4.782
 make_basis_sm                       11  9.8    0.039    0.065    4.717    4.719
 sum_up_and_integrate               119 10.3    0.012    0.014    4.457    4.487
 integrate_v_rspace                 119 11.3    0.002    0.002    4.445    4.476
 qs_rho_update_rho_low              119  7.7    0.000    0.000    3.990    4.109
 calculate_rho_elec                 119  8.7    0.011    0.016    3.989    4.109
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    3.756    3.839
 calculate_dm_sparse                119  9.5    0.000    0.000    3.172    3.320
 multiply_cannon_metrocomm1       54864 15.5    0.052    0.059    1.726    3.217
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.877    3.080
 apply_single                       119 13.6    0.000    0.000    2.877    3.080
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.962    2.964
 acc_transpose_blocks             54864 15.5    0.233    0.256    2.234    2.885
 init_scf_loop                       11  6.9    0.000    0.000    2.885    2.885
 cp_dbcsr_syevd                      50 12.0    0.002    0.003    2.882    2.883
 jit_kernel_multiply                 13 15.8    2.237    2.880    2.237    2.880
 rs_pw_transfer                     974 11.9    0.012    0.013    2.714    2.799
 mp_sum_d                          4125 12.0    1.707    2.790    1.707    2.790
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.693    2.745
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.686    2.687
 cp_fm_redistribute_end              50 14.0    2.447    2.666    2.452    2.668
 ot_diis_step                       108 11.5    0.006    0.006    2.665    2.666
 cp_fm_diag_elpa_base                50 14.0    0.214    2.579    0.215    2.591
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.380    2.434
 density_rs2pw                      119  9.7    0.004    0.004    2.105    2.204
 grid_integrate_task_list           119 12.3    2.030    2.131    2.030    2.131
 wfi_extrapolate                     11  7.9    0.001    0.001    2.030    2.030
 potential_pw2rs                    119 12.3    0.004    0.004    1.834    1.865
 acc_transpose_blocks_kernels     54864 16.5    0.253    0.386    1.237    1.735
 pw_transfer                       1439 11.6    0.051    0.056    1.609    1.698
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.001    1.676    1.694
 fft_wrap_pw1pw2                   1201 12.6    0.006    0.007    1.534    1.628
 make_m2s                          4572 13.5    0.054    0.056    1.562    1.603
 make_images                       4572 14.5    0.133    0.139    1.479    1.519
 mp_alltoall_d11v                  2130 13.8    1.252    1.442    1.252    1.442
 mp_waitany                       12084 13.8    1.230    1.419    1.230    1.419
 fft3d_ps                          1201 14.6    0.358    0.460    1.313    1.394
 grid_collocate_task_list           119  9.7    1.291    1.390    1.291    1.390
 jit_kernel_transpose                 5 15.5    0.984    1.357    0.984    1.357
 create_qs_kind_set                   1  2.0    0.001    0.002    1.343    1.355
 read_qs_kind                         2  3.0    0.012    0.026    1.342    1.355
 parser_read_line                  2807  4.0    0.001    0.001    1.330    1.346
 parser_read_line_low                 5  5.0    0.015    1.314    1.329    1.345
 broadcast_input_information          5  6.0    0.000    0.000    1.314    1.343
 mp_bcast_i                         105  3.0    1.306    1.338    1.306    1.338
 fft_wrap_pw1pw2_140                487 13.2    0.080    0.094    1.193    1.288
 arnoldi_extremal                   119 11.4    0.001    0.001    1.143    1.283
 arnoldi_normal_ev                  119 12.4    0.300    0.503    1.142    1.283
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=58.667000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=430.363636, yerr=0.979121
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             488.124416E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62664.
 MP_Allreduce        10226                    305.
 MP_Sync               104
 MP_Alltoall          2060                1066951.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.019    0.031   39.486   39.488
 qs_mol_dyn_low                       1  2.0    0.004    0.005   39.110   39.118
 qs_forces                           11  3.9    0.002    0.003   39.045   39.050
 qs_energies                         11  4.9    0.005    0.008   37.369   37.377
 scf_env_do_scf                      11  5.9    0.001    0.004   31.153   31.154
 scf_env_do_scf_inner_loop          108  6.5    0.040    0.063   28.676   28.678
 dbcsr_multiply_generic            2286 12.5    0.099    0.103   22.248   22.601
 qs_scf_new_mos                     108  7.5    0.001    0.001   19.784   20.027
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   19.784   20.027
 ot_scf_mini                        108  9.5    0.003    0.003   18.892   19.063
 multiply_cannon                   2286 13.5    0.209    0.217   17.334   18.880
 velocity_verlet                     10  3.0    0.001    0.002   17.926   17.927
 multiply_cannon_loop              2286 14.5    0.906    0.978   16.232   17.580
 ot_mini                            108 10.5    0.001    0.001   11.662   11.899
 mp_waitall_1                    200699 16.5    5.543   10.993    5.543   10.993
 multiply_cannon_multrec          27432 15.5    1.974    4.464    7.002    9.917
 multiply_cannon_metrocomm3       27432 15.5    0.068    0.071    4.186    9.594
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.205    9.380
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.086    7.225
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.015    7.085    7.225
 dbcsr_mm_accdrv_process          47894 16.0    3.872    6.088    4.958    6.943
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.254    6.380
 init_scf_run                        11  5.9    0.001    0.004    4.965    4.965
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    4.964    4.965
 qs_ot_get_p                        119 10.4    0.001    0.001    4.358    4.584
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.549    4.393
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.043    4.118
 apply_single                       119 13.6    0.000    0.000    3.043    4.118
 sum_up_and_integrate               119 10.3    0.024    0.027    4.099    4.108
 integrate_v_rspace                 119 11.3    0.002    0.003    4.075    4.085
 mp_sum_l                          7207 12.9    2.020    3.976    2.020    3.976
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.565    3.649
 calculate_rho_elec                 119  8.7    0.021    0.024    3.564    3.649
 calculate_first_density_matrix       1  7.0    0.001    0.002    3.461    3.468
 calculate_dm_sparse                119  9.5    0.000    0.001    3.137    3.215
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    2.904    2.923
 multiply_cannon_sync_h2d         27432 15.5    2.195    2.853    2.195    2.853
 make_m2s                          4572 13.5    0.052    0.054    2.450    2.669
 make_images                       4572 14.5    0.201    0.239    2.361    2.580
 rs_pw_transfer                     974 11.9    0.010    0.011    2.473    2.554
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.471    2.471
 init_scf_loop                       11  6.9    0.002    0.004    2.454    2.457
 jit_kernel_multiply                  9 16.2    1.033    2.421    1.033    2.421
 ot_diis_step                       108 11.5    0.010    0.011    2.407    2.408
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.001    2.351    2.361
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.107    2.199
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.149    2.150
 cp_fm_redistribute_end              50 14.0    1.780    2.126    1.783    2.127
 cp_fm_diag_elpa_base                50 14.0    0.329    2.020    0.342    2.070
 density_rs2pw                      119  9.7    0.004    0.004    1.912    1.997
 grid_integrate_task_list           119 12.3    1.837    1.942    1.837    1.942
 potential_pw2rs                    119 12.3    0.006    0.006    1.848    1.859
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.855    1.858
 pw_transfer                       1439 11.6    0.063    0.066    1.749    1.779
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.659    1.698
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.008    1.660    1.692
 make_images_data                  4572 15.5    0.045    0.052    1.131    1.547
 prepare_preconditioner              11  7.9    0.000    0.000    1.502    1.528
 make_preconditioner                 11  8.9    0.001    0.001    1.502    1.528
 acc_transpose_blocks             27432 15.5    0.110    0.115    1.185    1.482
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.407    1.464
 hybrid_alltoall_any               4725 16.4    0.051    0.112    0.980    1.463
 wfi_extrapolate                     11  7.9    0.001    0.001    1.447    1.448
 fft3d_ps                          1201 14.6    0.499    0.553    1.367    1.396
 fft_wrap_pw1pw2_140                487 13.2    0.076    0.083    1.303    1.337
 grid_collocate_task_list           119  9.7    1.246    1.313    1.246    1.313
 mp_alltoall_d11v                  2130 13.8    1.181    1.303    1.181    1.303
 mp_allgather_i34                  2286 14.5    0.544    1.286    0.544    1.286
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.165    1.214
 mp_sum_d                          4125 12.0    0.603    1.024    0.603    1.024
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    0.984    0.990
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.888    0.902
 acc_transpose_blocks_kernels     27432 16.5    0.182    0.269    0.652    0.859
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=39.488000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=464.727273, yerr=1.710444
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             520.347648E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62660.
 MP_Allreduce        10225                    303.
 MP_Sync               104
 MP_Alltoall          1821                1607811.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_comm_split          50
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.023    0.051   39.597   39.599
 qs_mol_dyn_low                       1  2.0    0.003    0.005   37.969   37.976
 qs_forces                           11  3.9    0.002    0.003   37.910   37.912
 qs_energies                         11  4.9    0.002    0.010   36.356   36.360
 scf_env_do_scf                      11  5.9    0.001    0.003   25.916   25.917
 scf_env_do_scf_inner_loop          108  6.5    0.004    0.009   22.469   22.470
 dbcsr_multiply_generic            2286 12.5    0.093    0.098   17.244   17.313
 velocity_verlet                     10  3.0    0.001    0.001   14.958   14.959
 multiply_cannon                   2286 13.5    0.195    0.201   14.004   14.764
 qs_scf_new_mos                     108  7.5    0.001    0.001   14.630   14.656
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   14.630   14.655
 multiply_cannon_loop              2286 14.5    0.638    0.662   13.214   14.010
 ot_scf_mini                        108  9.5    0.002    0.003   13.915   13.928
 init_scf_run                        11  5.9    0.000    0.001    9.252    9.252
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    9.252    9.252
 ot_mini                            108 10.5    0.001    0.001    8.591    8.600
 calculate_first_density_matrix       1  7.0    0.014    0.111    7.947    7.948
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.109    7.123
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.059    7.087
 qs_ks_build_kohn_sham_matrix       119  9.3    0.041    0.155    7.058    7.087
 multiply_cannon_multrec          18288 15.5    1.945    2.832    6.856    7.058
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.336    6.354
 dbcsr_mm_accdrv_process          38222 16.0    4.027    5.473    4.826    5.753
 make_basis_sm                       11  9.8    0.115    0.237    4.824    4.826
 parallel_gemm_fm                    81  9.0    0.000    0.000    4.649    4.652
 parallel_gemm_fm_cosma              81 10.0    4.649    4.652    4.649    4.652
 sum_up_and_integrate               119 10.3    0.030    0.031    3.955    4.015
 integrate_v_rspace                 119 11.3    0.011    0.072    3.924    3.988
 init_scf_loop                       11  6.9    0.011    0.079    3.427    3.430
 mp_waitall_1                    158411 16.6    2.460    3.340    2.460    3.340
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.293    3.307
 calculate_rho_elec                 119  8.7    0.030    0.031    3.293    3.306
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.655    3.277
 qs_ot_get_p                        119 10.4    0.001    0.001    3.173    3.188
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.482    2.484
 rs_pw_transfer                     974 11.9    0.009    0.010    2.249    2.357
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.330    2.337
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.028    2.333
 apply_single                       119 13.6    0.000    0.000    2.028    2.332
 acc_transpose_blocks             18288 15.5    0.077    0.078    2.233    2.250
 calculate_dm_sparse                119  9.5    0.001    0.002    2.158    2.185
 qs_ot_p2m_diag                      50 11.0    0.012    0.012    2.124    2.129
 multiply_cannon_metrocomm3       18288 15.5    0.045    0.046    1.336    2.058
 make_m2s                          4572 13.5    0.044    0.045    1.767    1.922
 density_rs2pw                      119  9.7    0.004    0.004    1.825    1.921
 grid_integrate_task_list           119 12.3    1.800    1.895    1.800    1.895
 jit_kernel_multiply                 10 16.0    0.747    1.881    0.747    1.881
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.861    1.861
 make_images                       4572 14.5    0.192    0.202    1.682    1.835
 pw_transfer                       1439 11.6    0.063    0.066    1.742    1.810
 acc_transpose_blocks_kernels     18288 16.5    0.210    0.217    1.792    1.805
 potential_pw2rs                    119 12.3    0.007    0.008    1.724    1.791
 prepare_preconditioner              11  7.9    0.000    0.000    1.767    1.777
 make_preconditioner                 11  8.9    0.006    0.036    1.767    1.777
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.652    1.721
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.619    1.712
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.629    1.630
 cp_fm_redistribute_end              50 14.0    1.216    1.610    1.217    1.611
 jit_kernel_transpose                 5 15.6    1.583    1.588    1.583    1.588
 cp_fm_diag_elpa_base                50 14.0    0.378    1.527    0.392    1.575
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.001    1.535    1.551
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.532    1.540
 multiply_cannon_sync_h2d         18288 15.5    1.420    1.536    1.420    1.536
 ot_diis_step                       108 11.5    0.011    0.011    1.464    1.464
 create_qs_kind_set                   1  2.0    0.001    0.001    1.421    1.437
 read_qs_kind                         2  3.0    0.012    0.020    1.420    1.437
 parser_read_line                  2807  4.0    0.001    0.001    1.408    1.429
 parser_read_line_low                 5  5.0    0.045    1.403    1.408    1.428
 broadcast_input_information          5  6.0    0.000    0.000    1.363    1.427
 mp_sum_l                          7207 12.9    1.089    1.422    1.089    1.422
 mp_bcast_i                         105  3.0    1.357    1.421    1.357    1.421
 fft3d_ps                          1201 14.6    0.506    0.522    1.338    1.411
 fft_wrap_pw1pw2_140                487 13.2    0.087    0.090    1.320    1.390
 grid_collocate_task_list           119  9.7    1.216    1.310    1.216    1.310
 wfi_extrapolate                     11  7.9    0.001    0.001    1.154    1.154
 mp_sum_d                          4125 12.0    0.820    1.094    0.820    1.094
 qs_vxc_create                      119 10.3    0.033    0.118    0.916    0.994
 xc_vxc_pw_create                   119 11.3    0.109    0.232    0.883    0.990
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    0.958    0.960
 make_images_data                  4572 15.5    0.045    0.049    0.786    0.952
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.868    0.891
 hybrid_alltoall_any               4725 16.4    0.055    0.111    0.674    0.869
 mp_alltoall_d11v                  2130 13.8    0.735    0.862    0.735    0.862
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.801    0.803
 mp_alltoall_z22v                  1201 16.6    0.675    0.792    0.675    0.792
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=39.599000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=495.727273, yerr=1.482682
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             557.592576E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62659.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_comm_split          50
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.052    0.201   38.498   38.502
 qs_mol_dyn_low                       1  2.0    0.003    0.004   37.982   37.989
 qs_forces                           11  3.9    0.002    0.003   37.925   37.925
 qs_energies                         11  4.9    0.001    0.004   36.229   36.234
 scf_env_do_scf                      11  5.9    0.001    0.005   29.193   29.194
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.012   23.932   23.933
 dbcsr_multiply_generic            2286 12.5    0.099    0.101   20.027   20.151
 velocity_verlet                     10  3.0    0.001    0.001   17.361   17.362
 multiply_cannon                   2286 13.5    0.233    0.267   16.406   16.873
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.951   16.004
 qs_scf_loop_do_ot                  108  8.5    0.001    0.005   15.951   16.003
 multiply_cannon_loop              2286 14.5    0.944    0.975   15.470   15.834
 ot_scf_mini                        108  9.5    0.004    0.018   15.025   15.073
 multiply_cannon_multrec          27432 15.5    2.335    2.988    9.385    9.719
 ot_mini                            108 10.5    0.001    0.001    9.239    9.307
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.215    8.277
 qs_ks_build_kohn_sham_matrix       119  9.3    0.048    0.304    8.215    8.276
 dbcsr_mm_accdrv_process          47916 15.9    6.070    7.975    6.955    8.201
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.501    7.556
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.437    7.488
 init_scf_run                        11  5.9    0.000    0.001    5.656    5.657
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.656    5.657
 init_scf_loop                       11  6.9    0.001    0.005    5.238    5.238
 calculate_first_density_matrix       1  7.0    0.000    0.002    4.312    4.312
 sum_up_and_integrate               119 10.3    0.035    0.037    3.677    3.686
 integrate_v_rspace                 119 11.3    0.002    0.003    3.642    3.652
 qs_ot_get_p                        119 10.4    0.001    0.001    3.262    3.333
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.204    3.236
 calculate_rho_elec                 119  8.7    0.040    0.046    3.203    3.236
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.673    3.162
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.071    3.073
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.895    2.912
 calculate_dm_sparse                119  9.5    0.000    0.001    2.815    2.871
 acc_transpose_blocks             27432 15.5    0.114    0.120    2.688    2.723
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.118    2.573
 apply_single                       119 13.6    0.000    0.000    2.118    2.573
 prepare_preconditioner              11  7.9    0.000    0.000    2.500    2.507
 make_preconditioner                 11  8.9    0.000    0.002    2.500    2.507
 mp_sum_d                          4123 12.0    2.013    2.478    2.013    2.478
 jit_kernel_multiply                  9 16.2    0.824    2.434    0.824    2.434
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.117    2.431
 make_m2s                          4572 13.5    0.054    0.056    2.132    2.241
 mp_waitall_1                    137007 16.6    1.739    2.229    1.739    2.229
 make_images                       4572 14.5    0.273    0.332    2.025    2.131
 acc_transpose_blocks_kernels     27432 16.5    0.267    0.274    2.059    2.086
 rs_pw_transfer                     974 11.9    0.009    0.009    1.911    2.048
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    2.007    2.018
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    1.970    1.979
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.901    1.928
 grid_integrate_task_list           119 12.3    1.824    1.904    1.824    1.904
 qs_vxc_create                      119 10.3    0.145    1.148    1.858    1.901
 xc_vxc_pw_create                   119 11.3    0.044    0.071    1.713    1.900
 jit_kernel_transpose                 5 15.6    1.792    1.813    1.792    1.813
 density_rs2pw                      119  9.7    0.004    0.004    1.690    1.804
 mp_sum_l                          7207 12.9    1.220    1.774    1.220    1.774
 ot_diis_step                       108 11.5    0.012    0.012    1.761    1.761
 pw_transfer                       1439 11.6    0.063    0.066    1.729    1.759
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.639    1.672
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.656    1.656
 potential_pw2rs                    119 12.3    0.008    0.009    1.472    1.478
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.430    1.431
 cp_fm_redistribute_end              50 14.0    0.948    1.409    0.949    1.409
 cp_fm_diag_elpa_base                50 14.0    0.438    1.345    0.457    1.383
 fft_wrap_pw1pw2_140                487 13.2    0.085    0.094    1.347    1.382
 fft3d_ps                          1201 14.6    0.534    0.582    1.320    1.343
 grid_collocate_task_list           119  9.7    1.221    1.316    1.221    1.316
 wfi_extrapolate                     11  7.9    0.001    0.001    1.299    1.299
 multiply_cannon_metrocomm3       27432 15.5    0.038    0.040    0.772    1.192
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.125    1.125
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.087    1.107
 cp_fm_upper_to_full                 72 13.5    0.789    1.088    0.789    1.088
 dbcsr_complete_redistribute        329 12.2    0.119    0.142    0.778    1.043
 multiply_cannon_sync_h2d         27432 15.5    0.985    1.040    0.985    1.040
 mp_alltoall_d11v                  2130 13.8    0.732    0.938    0.732    0.938
 make_images_data                  4572 15.5    0.046    0.049    0.818    0.928
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.795    0.874
 hybrid_alltoall_any               4725 16.4    0.062    0.151    0.708    0.863
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.557    0.816
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.805    0.810
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=38.502000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=529.545455, yerr=3.551277
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             608.501760E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_comm_split          50
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.043    0.060   28.829   28.829
 qs_mol_dyn_low                       1  2.0    0.003    0.003   28.598   28.605
 qs_forces                           11  3.9    0.001    0.002   28.540   28.541
 qs_energies                         11  4.9    0.001    0.001   26.841   26.844
 scf_env_do_scf                      11  5.9    0.000    0.001   20.933   20.933
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   18.455   18.456
 velocity_verlet                     10  3.0    0.001    0.001   14.077   14.079
 dbcsr_multiply_generic            2286 12.5    0.092    0.096   13.366   13.443
 multiply_cannon                   2286 13.5    0.231    0.239   10.848   11.212
 qs_scf_new_mos                     108  7.5    0.001    0.001   10.960   10.992
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   10.960   10.991
 ot_scf_mini                        108  9.5    0.002    0.002   10.292   10.322
 multiply_cannon_loop              2286 14.5    0.331    0.342    9.948   10.153
 multiply_cannon_multrec           9144 15.5    1.660    1.913    7.030    7.273
 ot_mini                            108 10.5    0.001    0.001    5.752    5.785
 rebuild_ks_matrix                  119  8.3    0.000    0.000    5.752    5.773
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    5.751    5.772
 dbcsr_mm_accdrv_process          12550 15.8    4.152    5.202    5.269    5.352
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.121    5.141
 init_scf_run                        11  5.9    0.000    0.001    4.485    4.485
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    4.485    4.485
 qs_ot_get_derivative               108 11.5    0.001    0.001    4.426    4.455
 sum_up_and_integrate               119 10.3    0.037    0.041    3.517    3.522
 integrate_v_rspace                 119 11.3    0.002    0.003    3.479    3.485
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.378    3.378
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.178    3.186
 calculate_rho_elec                 119  8.7    0.059    0.061    3.178    3.186
 qs_ot_get_p                        119 10.4    0.001    0.001    2.753    2.802
 calculate_dm_sparse                119  9.5    0.000    0.000    2.783    2.800
 init_scf_loop                       11  6.9    0.000    0.000    2.455    2.456
 jit_kernel_multiply                  7 15.7    1.078    2.361    1.078    2.361
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    2.207    2.212
 mp_waitall_1                    115863 16.7    1.426    1.943    1.426    1.943
 grid_integrate_task_list           119 12.3    1.856    1.930    1.856    1.930
 make_m2s                          4572 13.5    0.034    0.035    1.726    1.879
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.843    1.845
 make_images                       4572 14.5    0.269    0.302    1.637    1.787
 prepare_preconditioner              11  7.9    0.000    0.000    1.722    1.727
 make_preconditioner                 11  8.9    0.000    0.000    1.722    1.727
 pw_transfer                       1439 11.6    0.063    0.065    1.687    1.694
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.680    1.681
 rs_pw_transfer                     974 11.9    0.008    0.008    1.603    1.674
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.644    1.644
 density_rs2pw                      119  9.7    0.003    0.004    1.580    1.631
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.602    1.626
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.596    1.604
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.556    1.571
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.525    1.531
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.364    1.365
 cp_fm_redistribute_end              50 14.0    0.681    1.345    0.682    1.345
 cp_fm_diag_elpa_base                50 14.0    0.618    1.275    0.662    1.329
 grid_collocate_task_list           119  9.7    1.271    1.319    1.271    1.319
 ot_diis_step                       108 11.5    0.012    0.013    1.314    1.315
 fft_wrap_pw1pw2_140                487 13.2    0.082    0.086    1.298    1.306
 potential_pw2rs                    119 12.3    0.010    0.010    1.298    1.302
 fft3d_ps                          1201 14.6    0.539    0.550    1.264    1.272
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.214    1.214
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.167    1.187
 apply_single                       119 13.6    0.000    0.000    1.167    1.186
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.112    1.126
 wfi_extrapolate                     11  7.9    0.001    0.001    1.064    1.064
 hybrid_alltoall_any               4725 16.4    0.063    0.176    0.790    1.017
 acc_transpose_blocks              9144 15.5    0.039    0.040    0.985    0.991
 make_images_data                  4572 15.5    0.040    0.043    0.808    0.982
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.870    0.930
 mp_alltoall_d11v                  2130 13.8    0.823    0.910    0.823    0.910
 cp_fm_cholesky_invert               11 10.9    0.868    0.871    0.868    0.871
 multiply_cannon_sync_h2d          9144 15.5    0.715    0.788    0.715    0.788
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.752    0.755
 acc_transpose_blocks_kernels      9144 16.5    0.117    0.120    0.744    0.747
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.722    0.731
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    0.667    0.718
 multiply_cannon_metrocomm3        9144 15.5    0.019    0.019    0.347    0.665
 mp_allgather_i34                  2286 14.5    0.235    0.654    0.235    0.654
 mp_alltoall_z22v                  1201 16.6    0.599    0.633    0.599    0.633
 jit_kernel_transpose                 5 15.6    0.627    0.629    0.627    0.629
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=28.829000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=573.636364, yerr=8.967342
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             766.980096E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63729.
 MP_Allreduce        10074                    433.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.047   43.668   43.669
 qs_mol_dyn_low                       1  2.0    0.003    0.005   43.227   43.234
 qs_forces                           11  3.9    0.003    0.004   43.167   43.169
 qs_energies                         11  4.9    0.001    0.001   41.213   41.217
 scf_env_do_scf                      11  5.9    0.001    0.001   34.193   34.193
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.006   26.485   26.486
 velocity_verlet                     10  3.0    0.001    0.001   22.702   22.708
 dbcsr_multiply_generic            2286 12.5    0.100    0.102   20.121   20.356
 qs_scf_new_mos                     108  7.5    0.001    0.001   17.701   17.796
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   17.700   17.795
 multiply_cannon                   2286 13.5    0.303    0.308   16.273   17.230
 ot_scf_mini                        108  9.5    0.002    0.002   16.623   16.721
 multiply_cannon_loop              2286 14.5    0.344    0.351   15.002   15.987
 ot_mini                            108 10.5    0.001    0.001   10.739   10.853
 multiply_cannon_multrec           9144 15.5    3.336    4.684    9.515   10.577
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.696    8.796
 dbcsr_mm_accdrv_process          12550 15.8    4.680    7.410    6.054    8.328
 init_scf_loop                       11  6.9    0.000    0.000    7.680    7.681
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.779    6.917
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    6.778    6.917
 prepare_preconditioner              11  7.9    0.000    0.000    6.769    6.782
 make_preconditioner                 11  8.9    0.000    0.000    6.769    6.782
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.336    6.656
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.127    6.253
 mp_waitall_1                     94719 16.7    3.317    5.338    3.317    5.338
 init_scf_run                        11  5.9    0.000    0.001    4.873    4.873
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    4.872    4.873
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    4.440    4.855
 cp_fm_upper_to_full                 72 14.2    3.140    4.512    3.140    4.512
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    2.274    4.202
 jit_kernel_multiply                 10 15.4    1.347    3.771    1.347    3.771
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.615    3.638
 calculate_rho_elec                 119  8.7    0.118    0.121    3.614    3.637
 sum_up_and_integrate               119 10.3    0.064    0.065    3.627    3.633
 integrate_v_rspace                 119 11.3    0.003    0.003    3.562    3.569
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.468    3.473
 qs_ot_get_p                        119 10.4    0.001    0.001    3.152    3.285
 calculate_dm_sparse                119  9.5    0.000    0.000    2.925    2.946
 dbcsr_complete_redistribute        329 12.2    0.289    0.296    1.980    2.765
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.196    2.479
 apply_single                       119 13.6    0.000    0.000    2.195    2.479
 make_m2s                          4572 13.5    0.038    0.038    2.288    2.472
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.663    2.448
 make_images                       4572 14.5    0.353    0.383    2.167    2.351
 mp_alltoall_i22                    627 13.8    1.399    2.226    1.399    2.226
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.428    2.206
 grid_integrate_task_list           119 12.3    2.000    2.025    2.000    2.025
 ot_diis_step                       108 11.5    0.014    0.014    2.017    2.017
 pw_transfer                       1439 11.6    0.066    0.067    2.004    2.007
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.994    2.005
 qs_ot_p2m_diag                      50 11.0    0.043    0.044    1.912    1.913
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.908    1.911
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.878    1.878
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.866    1.866
 density_rs2pw                      119  9.7    0.003    0.003    1.728    1.750
 mp_sum_l                          7207 12.9    1.034    1.740    1.034    1.740
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.681    1.731
 acc_transpose_blocks              9144 15.5    0.038    0.039    1.643    1.670
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.621    1.665
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.639    1.639
 fft_wrap_pw1pw2_140                487 13.2    0.089    0.089    1.602    1.605
 fft3d_ps                          1201 14.6    0.567    0.576    1.540    1.545
 grid_collocate_task_list           119  9.7    1.444    1.464    1.444    1.464
 cp_fm_cholesky_invert               11 10.9    1.439    1.442    1.439    1.442
 acc_transpose_blocks_kernels      9144 16.5    0.117    0.119    1.394    1.417
 rs_pw_transfer                     974 11.9    0.009    0.009    1.364    1.406
 hybrid_alltoall_any               4725 16.4    0.087    0.149    1.120    1.380
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.348    1.348
 cp_fm_diag_elpa_base                50 14.0    1.207    1.257    1.346    1.346
 wfi_extrapolate                     11  7.9    0.001    0.001    1.339    1.339
 make_images_data                  4572 15.5    0.043    0.046    1.073    1.316
 jit_kernel_transpose                 5 15.6    1.276    1.299    1.276    1.299
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.165    1.188
 mp_alltoall_d11v                  2130 13.8    1.159    1.187    1.159    1.187
 potential_pw2rs                    119 12.3    0.014    0.015    1.171    1.172
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.136    1.156
 multiply_cannon_sync_h2d          9144 15.5    1.039    1.044    1.039    1.044
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.989    1.040
 qs_create_task_list                 11  7.9    0.007    0.007    0.950    0.961
 generate_qs_task_list               11  8.9    0.372    0.393    0.943    0.955
 make_basis_sm                       11  9.8    0.364    0.368    0.879    0.879
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.863    0.876
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=43.669000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=712.363636, yerr=18.227216
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             503.537664E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65372.
 MP_Allreduce         9840                    486.
 MP_Sync               100
 MP_Alltoall          1938                1452934.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_comm_split          48
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.074    0.166   84.438   84.439
 qs_mol_dyn_low                       1  2.0    0.004    0.006   83.872   83.881
 qs_forces                           11  3.9    0.002    0.004   83.800   83.801
 qs_energies                         11  4.9    0.002    0.007   80.934   80.954
 scf_env_do_scf                      11  5.9    0.001    0.002   71.131   71.133
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.008   65.482   65.483
 dbcsr_multiply_generic            2055 12.4    0.106    0.111   52.877   53.122
 qs_scf_new_mos                      99  7.5    0.000    0.001   48.546   48.658
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   48.545   48.657
 ot_scf_mini                         99  9.5    0.002    0.003   46.149   46.238
 multiply_cannon                   2055 13.4    0.179    0.188   43.038   43.876
 multiply_cannon_loop              2055 14.4    1.554    1.596   42.084   42.965
 velocity_verlet                     10  3.0    0.001    0.002   42.308   42.309
 ot_mini                             99 10.5    0.001    0.001   27.825   27.927
 qs_ot_get_derivative                99 11.5    0.002    0.019   21.014   21.098
 multiply_cannon_multrec          49320 15.4   12.084   12.870   17.454   18.246
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.477   14.631
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.013   14.477   14.630
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.708   12.842
 mp_waitall_1                    220248 16.4   11.699   12.680   11.699   12.680
 multiply_cannon_sync_h2d         49320 15.4    9.911   10.683    9.911   10.683
 qs_ot_get_p                        110 10.4    0.001    0.001    9.696    9.807
 multiply_cannon_metrocomm3       49320 15.4    0.079    0.085    7.044    8.397
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.166    7.719
 init_scf_run                        11  5.9    0.000    0.001    7.672    7.673
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    7.672    7.672
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.138    7.530
 apply_single                       110 13.6    0.000    0.001    7.138    7.530
 sum_up_and_integrate               110 10.3    0.036    0.043    7.082    7.094
 integrate_v_rspace                 110 11.3    0.002    0.003    7.045    7.066
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    6.797    6.851
 ot_diis_step                        99 11.5    0.005    0.006    6.520    6.520
 qs_ot_p2m_diag                      48 11.0    0.012    0.019    6.421    6.452
 qs_rho_update_rho_low              110  7.6    0.000    0.001    6.213    6.338
 calculate_rho_elec                 110  8.6    0.020    0.024    6.212    6.337
 mp_sum_l                          6514 12.8    5.435    6.178    5.435    6.178
 init_scf_loop                       11  6.9    0.005    0.022    5.619    5.621
 dbcsr_mm_accdrv_process          87628 16.1    2.086    2.181    5.249    5.539
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    5.400    5.401
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.927    4.928
 cp_fm_redistribute_end              48 14.0    4.300    4.901    4.303    4.902
 cp_fm_diag_elpa_base                48 14.0    0.593    4.756    0.596    4.783
 calculate_dm_sparse                110  9.5    0.001    0.001    4.088    4.183
 make_m2s                          4110 13.4    0.061    0.066    3.980    4.103
 rs_pw_transfer                     902 11.9    0.012    0.013    3.875    4.079
 wfi_extrapolate                     11  7.9    0.001    0.001    4.016    4.016
 make_images                       4110 14.4    0.178    0.192    3.885    4.011
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.814    3.819
 multiply_cannon_metrocomm1       49320 15.4    0.060    0.064    2.661    3.791
 calculate_first_density_matrix       1  7.0    0.001    0.003    3.565    3.573
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.484    3.521
 density_rs2pw                      110  9.6    0.004    0.004    3.317    3.481
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    3.403    3.475
 prepare_preconditioner              11  7.9    0.000    0.000    3.409    3.438
 make_preconditioner                 11  8.9    0.000    0.001    3.409    3.438
 grid_integrate_task_list           110 12.3    3.272    3.427    3.272    3.427
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.179    3.215
 pw_transfer                       1331 11.6    0.054    0.066    2.876    2.946
 jit_kernel_multiply                 13 15.9    2.876    2.909    2.876    2.909
 fft_wrap_pw1pw2                   1111 12.6    0.007    0.008    2.789    2.862
 potential_pw2rs                    110 12.3    0.006    0.007    2.669    2.696
 mp_alltoall_d11v                  2046 13.8    2.077    2.538    2.077    2.538
 acc_transpose_blocks             49320 15.4    0.226    0.235    2.386    2.471
 fft_wrap_pw1pw2_140                451 13.1    0.168    0.188    2.332    2.402
 fft3d_ps                          1111 14.6    0.752    0.840    2.328    2.388
 grid_collocate_task_list           110  9.6    2.088    2.201    2.088    2.201
 mp_waitany                       14300 13.8    1.850    2.154    1.850    2.154
 mp_sum_d                          3879 11.9    1.504    2.036    1.504    2.036
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.978    2.003
 make_images_data                  4110 15.4    0.043    0.046    1.814    1.967
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.910    1.933
 hybrid_alltoall_any               4261 16.3    0.082    0.481    1.566    1.864
 cp_fm_cholesky_invert               11 10.9    1.823    1.826    1.823    1.826
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=84.439000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=477.545455, yerr=3.143930
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             586.813440E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65587.
 MP_Allreduce         9839                    562.
 MP_Sync               100
 MP_Alltoall          1717                3507907.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_comm_split          48
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.099    0.162   71.076   71.077
 qs_mol_dyn_low                       1  2.0    0.003    0.004   70.263   70.274
 qs_forces                           11  3.9    0.002    0.003   70.185   70.187
 qs_energies                         11  4.9    0.004    0.007   66.889   66.893
 scf_env_do_scf                      11  5.9    0.001    0.001   56.866   56.869
 scf_env_do_scf_inner_loop           99  6.5    0.019    0.037   49.114   49.116
 dbcsr_multiply_generic            2055 12.4    0.112    0.119   39.509   39.687
 velocity_verlet                     10  3.0    0.001    0.001   36.105   36.107
 multiply_cannon                   2055 13.4    0.225    0.256   32.909   33.977
 qs_scf_new_mos                      99  7.5    0.001    0.001   33.332   33.481
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   33.331   33.481
 multiply_cannon_loop              2055 14.4    0.929    0.951   31.559   32.362
 ot_scf_mini                         99  9.5    0.003    0.004   31.673   31.811
 ot_mini                             99 10.5    0.001    0.001   18.909   19.052
 multiply_cannon_multrec          24660 15.4    7.599    9.608   14.332   16.186
 rebuild_ks_matrix                  110  8.3    0.000    0.001   13.565   13.661
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.015   13.564   13.660
 qs_ot_get_derivative                99 11.5    0.001    0.001   13.083   13.225
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.965   12.047
 mp_waitall_1                    176588 16.5    7.971   10.554    7.971   10.554
 multiply_cannon_metrocomm3       24660 15.4    0.070    0.073    5.489    8.347
 multiply_cannon_sync_h2d         24660 15.4    6.981    8.137    6.981    8.137
 init_scf_loop                       11  6.9    0.001    0.004    7.709    7.712
 init_scf_run                        11  5.9    0.000    0.001    7.586    7.587
 scf_env_initial_rho_setup           11  6.9    0.009    0.047    7.586    7.587
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.514    7.213
 apply_single                       110 13.6    0.000    0.001    6.514    7.213
 dbcsr_mm_accdrv_process          52282 16.1    5.058    6.003    6.573    7.121
 sum_up_and_integrate               110 10.3    0.053    0.059    6.503    6.515
 integrate_v_rspace                 110 11.3    0.002    0.002    6.450    6.461
 qs_ot_get_p                        110 10.4    0.001    0.002    6.174    6.317
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    5.109    5.833
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.751    5.777
 ot_diis_step                        99 11.5    0.010    0.010    5.777    5.777
 calculate_rho_elec                 110  8.6    0.039    0.047    5.751    5.777
 prepare_preconditioner              11  7.9    0.000    0.000    5.650    5.669
 make_preconditioner                 11  8.9    0.000    0.001    5.650    5.669
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.248    5.406
 make_m2s                          4110 13.4    0.057    0.062    4.186    4.690
 make_images                       4110 14.4    0.401    0.446    4.077    4.579
 qs_ot_p2m_diag                      48 11.0    0.029    0.044    4.250    4.270
 calculate_dm_sparse                110  9.5    0.001    0.001    4.241    4.270
 calculate_first_density_matrix       1  7.0    0.011    0.028    3.960    3.970
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.814    3.814
 wfi_extrapolate                     11  7.9    0.001    0.001    3.529    3.529
 pw_transfer                       1331 11.6    0.065    0.069    3.312    3.455
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.206    3.348
 grid_integrate_task_list           110 12.3    3.161    3.309    3.161    3.309
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.292    3.294
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.267    3.268
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.183    3.259
 cp_fm_redistribute_end              48 14.0    2.444    3.242    2.445    3.242
 density_rs2pw                      110  9.6    0.004    0.004    3.053    3.221
 cp_fm_diag_elpa_base                48 14.0    0.763    3.092    0.794    3.186
 rs_pw_transfer                     902 11.9    0.012    0.013    2.941    3.096
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.938    2.984
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.002    2.815    2.825
 make_images_data                  4110 15.4    0.047    0.051    2.319    2.821
 hybrid_alltoall_any               4261 16.3    0.102    0.436    2.053    2.817
 fft_wrap_pw1pw2_140                451 13.1    0.200    0.218    2.663    2.800
 fft3d_ps                          1111 14.6    1.074    1.270    2.563    2.690
 cp_fm_cholesky_invert               11 10.9    2.622    2.629    2.622    2.629
 acc_transpose_blocks             24660 15.4    0.111    0.114    2.566    2.594
 jit_kernel_multiply                 11 16.3    1.157    2.561    1.157    2.561
 mp_sum_l                          6514 12.8    1.821    2.416    1.821    2.416
 potential_pw2rs                    110 12.3    0.008    0.009    2.391    2.405
 grid_collocate_task_list           110  9.6    2.122    2.247    2.122    2.247
 mp_alltoall_d11v                  2046 13.8    1.767    2.036    1.767    2.036
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.950    1.971
 acc_transpose_blocks_kernels     24660 16.4    0.309    0.317    1.948    1.969
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.912    1.913
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.765    1.776
 mp_allgather_i34                  2055 14.4    0.677    1.656    0.677    1.656
 jit_kernel_transpose                 5 15.6    1.639    1.653    1.639    1.653
 multiply_cannon_metrocomm4       22605 15.4    0.073    0.076    0.799    1.609
 mp_irecv_dv                      57340 16.2    0.675    1.492    0.675    1.492
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.374    1.487
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=71.077000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=555.272727, yerr=6.579457
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             660.664320E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65578.
 MP_Allreduce         9838                    559.
 MP_Sync               100
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_comm_split          48
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.039    0.073   60.581   60.582
 qs_mol_dyn_low                       1  2.0    0.003    0.004   60.216   60.224
 qs_forces                           11  3.9    0.002    0.003   60.151   60.152
 qs_energies                         11  4.9    0.011    0.033   56.977   56.981
 scf_env_do_scf                      11  5.9    0.001    0.001   47.508   47.509
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   39.069   39.070
 velocity_verlet                     10  3.0    0.001    0.001   32.034   32.036
 dbcsr_multiply_generic            2055 12.4    0.106    0.108   28.598   28.821
 qs_scf_new_mos                      99  7.5    0.001    0.001   24.734   24.855
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   24.734   24.855
 ot_scf_mini                         99  9.5    0.003    0.003   23.522   23.654
 multiply_cannon                   2055 13.4    0.213    0.222   22.445   23.607
 multiply_cannon_loop              2055 14.4    0.618    0.634   21.296   22.397
 ot_mini                             99 10.5    0.001    0.001   13.387   13.515
 rebuild_ks_matrix                  110  8.3    0.000    0.001   12.061   12.188
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.015   12.061   12.187
 multiply_cannon_multrec          16440 15.4    3.954    4.976    9.934   11.152
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.641   10.754
 mp_waitall_1                    139946 16.5    6.910   10.053    6.910   10.053
 qs_ot_get_derivative                99 11.5    0.001    0.001    8.969    9.096
 init_scf_loop                       11  6.9    0.001    0.003    8.403    8.404
 multiply_cannon_metrocomm3       16440 15.4    0.044    0.045    4.228    7.043
 prepare_preconditioner              11  7.9    0.000    0.000    6.660    6.678
 make_preconditioner                 11  8.9    0.000    0.001    6.660    6.678
 sum_up_and_integrate               110 10.3    0.061    0.062    6.362    6.377
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.022    6.366
 init_scf_run                        11  5.9    0.000    0.001    6.317    6.319
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    6.317    6.319
 integrate_v_rspace                 110 11.3    0.002    0.003    6.301    6.317
 dbcsr_mm_accdrv_process          34862 16.1    4.747    5.532    5.833    6.136
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.760    5.772
 calculate_rho_elec                 110  8.6    0.058    0.058    5.760    5.771
 qs_ot_get_p                        110 10.4    0.001    0.001    5.380    5.518
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.936    5.341
 apply_single                       110 13.6    0.000    0.000    4.936    5.341
 make_m2s                          4110 13.4    0.050    0.052    4.084    4.427
 ot_diis_step                        99 11.5    0.010    0.011    4.390    4.390
 multiply_cannon_sync_h2d         16440 15.4    3.702    4.346    3.702    4.346
 make_images                       4110 14.4    0.394    0.511    3.969    4.309
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    3.808    3.811
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.072    3.705
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.473    3.474
 grid_integrate_task_list           110 12.3    3.186    3.408    3.186    3.408
 pw_transfer                       1331 11.6    0.064    0.071    3.158    3.164
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.052    3.060
 calculate_dm_sparse                110  9.5    0.001    0.001    2.983    3.011
 calculate_first_density_matrix       1  7.0    0.001    0.003    2.996    2.999
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.960    2.962
 wfi_extrapolate                     11  7.9    0.001    0.001    2.947    2.947
 cp_fm_redistribute_end              48 14.0    1.846    2.932    1.848    2.933
 density_rs2pw                      110  9.6    0.004    0.004    2.758    2.917
 cp_fm_diag_elpa_base                48 14.0    1.022    2.783    1.080    2.889
 rs_pw_transfer                     902 11.9    0.011    0.011    2.637    2.832
 hybrid_alltoall_any               4261 16.3    0.105    0.373    2.083    2.807
 make_images_data                  4110 15.4    0.044    0.048    2.342    2.797
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.793    2.795
 cp_fm_cholesky_invert               11 10.9    2.649    2.656    2.649    2.656
 fft_wrap_pw1pw2_140                451 13.1    0.210    0.214    2.606    2.615
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.466    2.512
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.317    2.393
 fft3d_ps                          1111 14.6    1.063    1.076    2.368    2.379
 grid_collocate_task_list           110  9.6    2.176    2.370    2.176    2.370
 mp_alltoall_d11v                  2046 13.8    2.029    2.356    2.029    2.356
 multiply_cannon_metrocomm4       14385 15.4    0.045    0.049    0.879    2.289
 potential_pw2rs                    110 12.3    0.010    0.010    2.198    2.207
 mp_irecv_dv                      48980 15.7    0.808    2.168    0.808    2.168
 mp_sum_l                          6514 12.8    1.459    2.032    1.459    2.032
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.018    2.019
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.956    1.969
 jit_kernel_multiply                  8 16.6    0.689    1.887    0.689    1.887
 dbcsr_complete_redistribute        325 12.2    0.317    0.352    1.374    1.834
 cp_fm_upper_to_full                 70 13.6    1.378    1.832    1.378    1.832
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.620    1.634
 cp_fm_cholesky_decompose            22 10.9    1.544    1.563    1.544    1.563
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.370    1.506
 mp_allgather_i34                  2055 14.4    0.473    1.475    0.473    1.475
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.936    1.378
 acc_transpose_blocks             16440 15.4    0.072    0.074    1.341    1.362
 mp_waitany                       17072 13.8    1.147    1.332    1.147    1.332
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.237    1.249
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=60.582000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=626.545455, yerr=8.814901
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             724.381696E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65576.
 MP_Allreduce         9838                    600.
 MP_Sync               100
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_comm_split          48
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.048    0.134   68.268   68.270
 qs_mol_dyn_low                       1  2.0    0.005    0.038   67.439   67.449
 qs_forces                           11  3.9    0.029    0.037   66.690   66.692
 qs_energies                         11  4.9    0.005    0.033   63.296   63.325
 scf_env_do_scf                      11  5.9    0.001    0.001   53.740   53.743
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   42.090   42.090
 velocity_verlet                     10  3.0    0.007    0.053   37.313   37.341
 dbcsr_multiply_generic            2055 12.4    0.114    0.117   31.137   31.433
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.820   27.924
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.819   27.923
 ot_scf_mini                         99  9.5    0.003    0.003   26.202   26.305
 multiply_cannon                   2055 13.4    0.244    0.264   24.082   25.452
 multiply_cannon_loop              2055 14.4    0.889    0.910   22.723   23.416
 ot_mini                             99 10.5    0.001    0.001   15.661   15.791
 multiply_cannon_multrec          24660 15.4    4.223    6.783   13.599   15.279
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.882   12.007
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.015   11.882   12.007
 qs_ot_get_derivative                99 11.5    0.001    0.001   11.502   11.612
 init_scf_loop                       11  6.9    0.004    0.017   11.610   11.611
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.531   10.646
 dbcsr_mm_accdrv_process          52304 16.0    7.104    9.563    9.226   10.590
 prepare_preconditioner              11  7.9    0.000    0.000    9.822    9.844
 make_preconditioner                 11  8.9    0.000    0.002    9.822    9.844
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.075    9.515
 mp_waitall_1                    121746 16.5    4.966    7.399    4.966    7.399
 sum_up_and_integrate               110 10.3    0.068    0.076    6.293    6.308
 integrate_v_rspace                 110 11.3    0.002    0.003    6.224    6.242
 init_scf_run                        11  5.9    0.000    0.001    5.914    5.915
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.914    5.915
 make_m2s                          4110 13.4    0.060    0.061    5.446    5.732
 qs_ot_get_p                        110 10.4    0.001    0.001    5.552    5.710
 make_images                       4110 14.4    0.578    0.698    5.305    5.587
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.550    5.558
 calculate_rho_elec                 110  8.6    0.077    0.080    5.550    5.557
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.706    4.757
 multiply_cannon_metrocomm3       24660 15.4    0.036    0.037    2.058    4.684
 cp_fm_upper_to_full                 70 13.8    3.237    4.620    3.237    4.620
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.023    4.128
 apply_single                       110 13.6    0.000    0.000    4.023    4.128
 ot_diis_step                        99 11.5    0.011    0.011    4.123    4.124
 dbcsr_complete_redistribute        325 12.2    0.416    0.472    2.656    3.778
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    3.737    3.752
 grid_integrate_task_list           110 12.3    3.250    3.425    3.250    3.425
 multiply_cannon_sync_h2d         24660 15.4    3.177    3.374    3.177    3.374
 jit_kernel_multiply                 12 15.7    1.786    3.280    1.786    3.280
 pw_transfer                       1331 11.6    0.064    0.072    3.237    3.262
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.261    3.261
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.115    3.219
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.131    3.160
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.011    3.075
 make_images_data                  4110 15.4    0.047    0.051    2.749    3.034
 calculate_dm_sparse                110  9.5    0.001    0.001    2.962    2.987
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.977    2.978
 wfi_extrapolate                     11  7.9    0.001    0.001    2.970    2.970
 hybrid_alltoall_any               4261 16.3    0.120    0.458    2.289    2.956
 density_rs2pw                      110  9.6    0.004    0.004    2.728    2.926
 calculate_first_density_matrix       1  7.0    0.000    0.003    2.856    2.860
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.738    2.826
 mp_alltoall_i22                    605 13.7    1.640    2.808    1.640    2.808
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.723    2.725
 rs_pw_transfer                     902 11.9    0.010    0.011    2.446    2.725
 cp_fm_cholesky_invert               11 10.9    2.713    2.721    2.713    2.721
 cp_fm_redistribute_end              48 14.0    1.365    2.699    1.366    2.700
 fft_wrap_pw1pw2_140                451 13.1    0.202    0.214    2.656    2.689
 cp_fm_diag_elpa_base                48 14.0    1.253    2.560    1.331    2.670
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.599    2.637
 qs_energies_init_hamiltonians       11  5.9    0.008    0.024    2.516    2.545
 fft3d_ps                          1111 14.6    1.064    1.097    2.435    2.454
 grid_collocate_task_list           110  9.6    2.217    2.356    2.217    2.356
 potential_pw2rs                    110 12.3    0.012    0.013    2.076    2.089
 mp_alltoall_d11v                  2046 13.8    1.775    2.063    1.775    2.063
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.692    1.727
 cp_fm_cholesky_decompose            22 10.9    1.663    1.723    1.663    1.723
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.604    1.713
 acc_transpose_blocks             24660 15.4    0.107    0.110    1.634    1.654
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.607    1.620
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.581    1.595
 multiply_cannon_metrocomm4       20550 15.4    0.057    0.060    0.855    1.576
 mp_allgather_i34                  2055 14.4    0.462    1.561    0.462    1.561
 mp_irecv_dv                      62702 16.1    0.756    1.496    0.756    1.496
 mp_sum_l                          6514 12.8    0.969    1.486    0.969    1.486
 qs_env_update_s_mstruct             11  6.9    0.017    0.138    1.320    1.415
 mp_waitany                       13376 13.8    1.115    1.377    1.115    1.377
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=68.270000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=687.000000, yerr=7.942979
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             829.599744E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65574.
 MP_Allreduce         9838                    640.
 MP_Sync               100
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_comm_split          48
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.405    0.413   56.150   56.151
 qs_mol_dyn_low                       1  2.0    0.003    0.003   55.072   55.081
 qs_forces                           11  3.9    0.002    0.002   55.003   55.003
 qs_energies                         11  4.9    0.001    0.001   51.397   51.402
 scf_env_do_scf                      11  5.9    0.000    0.001   42.687   42.687
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   34.961   34.962
 velocity_verlet                     10  3.0    0.001    0.001   30.577   30.580
 dbcsr_multiply_generic            2055 12.4    0.105    0.107   23.686   23.820
 qs_scf_new_mos                      99  7.5    0.001    0.001   20.916   20.953
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   20.915   20.953
 ot_scf_mini                         99  9.5    0.002    0.002   19.679   19.702
 multiply_cannon                   2055 13.4    0.246    0.266   18.168   19.565
 multiply_cannon_loop              2055 14.4    0.322    0.334   16.805   17.117
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.421   11.445
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   11.420   11.445
 ot_mini                             99 10.5    0.001    0.001   10.783   10.801
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.181   10.201
 multiply_cannon_multrec           8220 15.4    3.237    4.453    8.155    9.052
 init_scf_loop                       11  6.9    0.000    0.000    7.680    7.681
 mp_waitall_1                    103326 16.6    6.006    7.535    6.006    7.535
 qs_ot_get_derivative                99 11.5    0.001    0.001    6.997    7.020
 sum_up_and_integrate               110 10.3    0.080    0.082    6.149    6.160
 integrate_v_rspace                 110 11.3    0.003    0.003    6.069    6.080
 prepare_preconditioner              11  7.9    0.000    0.000    6.034    6.037
 make_preconditioner                 11  8.9    0.000    0.000    6.034    6.037
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.614    5.686
 dbcsr_mm_accdrv_process          17442 15.9    2.996    3.906    4.790    5.643
 init_scf_run                        11  5.9    0.000    0.001    5.588    5.588
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.587    5.587
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.572    5.585
 calculate_rho_elec                 110  8.6    0.115    0.115    5.572    5.584
 qs_ot_get_p                        110 10.4    0.001    0.001    4.978    5.002
 make_m2s                          4110 13.4    0.038    0.040    4.250    4.542
 make_images                       4110 14.4    0.637    0.688    4.120    4.412
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.018    2.959    4.364
 ot_diis_step                        99 11.5    0.012    0.012    3.765    3.766
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.679    3.727
 apply_single                       110 13.6    0.000    0.000    3.679    3.727
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.598    3.602
 grid_integrate_task_list           110 12.3    3.370    3.457    3.370    3.457
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.299    3.299
 pw_transfer                       1331 11.6    0.065    0.069    3.242    3.251
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.136    3.146
 multiply_cannon_sync_h2d          8220 15.4    2.919    3.035    2.919    3.035
 cp_fm_cholesky_invert               11 10.9    2.906    2.909    2.906    2.909
 make_images_data                  4110 15.4    0.039    0.044    2.425    2.905
 hybrid_alltoall_any               4261 16.3    0.200    0.864    2.356    2.786
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.776    2.777
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.771    2.773
 density_rs2pw                      110  9.6    0.004    0.004    2.629    2.769
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.760    2.762
 cp_fm_redistribute_end              48 14.0    0.707    2.745    0.712    2.746
 calculate_dm_sparse                110  9.5    0.001    0.001    2.693    2.730
 wfi_extrapolate                     11  7.9    0.001    0.001    2.725    2.725
 cp_fm_diag_elpa_base                48 14.0    1.854    2.556    2.025    2.711
 fft_wrap_pw1pw2_140                451 13.1    0.212    0.214    2.674    2.692
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.662    2.663
 grid_collocate_task_list           110  9.6    2.328    2.448    2.328    2.448
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.405    2.414
 fft3d_ps                          1111 14.6    1.114    1.164    2.393    2.406
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.374    2.390
 rs_pw_transfer                     902 11.9    0.010    0.010    2.133    2.288
 jit_kernel_multiply                  9 15.6    1.479    2.253    1.479    2.253
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.777    1.991
 potential_pw2rs                    110 12.3    0.015    0.015    1.900    1.905
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    1.783    1.794
 mp_alltoall_d11v                  2046 13.8    1.637    1.747    1.637    1.747
 cp_fm_cholesky_decompose            22 10.9    1.676    1.687    1.676    1.687
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.658    1.670
 mp_allgather_i34                  2055 14.4    0.550    1.652    0.550    1.652
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.504    1.620
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.572    1.577
 dbcsr_complete_redistribute        325 12.2    0.561    0.590    1.445    1.532
 qs_create_task_list                 11  7.9    0.000    0.001    1.226    1.325
 generate_qs_task_list               11  8.9    0.379    0.447    1.226    1.325
 multiply_cannon_metrocomm1        8220 15.4    0.021    0.022    0.779    1.272
 mp_waitany                        9240 13.8    1.035    1.207    1.035    1.207
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.148    1.165
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=56.151000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=780.909091, yerr=11.595097
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.356812E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67104.
 MP_Allreduce         9672                    819.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.023    0.089   85.835   85.836
 qs_mol_dyn_low                       1  2.0    0.003    0.003   85.413   85.422
 qs_forces                           11  3.9    0.002    0.002   85.350   85.350
 qs_energies                         11  4.9    0.002    0.003   81.327   81.328
 scf_env_do_scf                      11  5.9    0.001    0.001   70.509   70.510
 velocity_verlet                     10  3.0    0.001    0.001   54.361   54.367
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   42.435   42.438
 dbcsr_multiply_generic            2055 12.4    0.119    0.121   29.241   29.338
 init_scf_loop                       11  6.9    0.000    0.000   28.003   28.009
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.267   26.322
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.266   26.321
 prepare_preconditioner              11  7.9    0.000    0.000   26.022   26.030
 make_preconditioner                 11  8.9    0.000    0.000   26.022   26.030
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.416   25.502
 ot_scf_mini                         99  9.5    0.002    0.002   24.513   24.551
 multiply_cannon                   2055 13.4    0.339    0.362   22.280   23.042
 multiply_cannon_loop              2055 14.4    0.342    0.346   20.493   20.838
 cp_fm_upper_to_full                 70 14.2   12.574   17.893   12.574   17.893
 ot_mini                             99 10.5    0.001    0.001   13.768   13.802
 rebuild_ks_matrix                  110  8.3    0.001    0.001   13.008   13.054
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   13.008   13.053
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.825   11.867
 dbcsr_complete_redistribute        325 12.2    1.029    1.044    7.182   10.337
 multiply_cannon_multrec           8220 15.4    4.335    4.529    9.993   10.264
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.161    9.309
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.213    9.252
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.590    8.717
 mp_waitall_1                     84994 16.7    7.598    8.609    7.598    8.609
 mp_alltoall_i22                    605 13.7    5.213    8.347    5.213    8.347
 init_scf_run                        11  5.9    0.000    0.001    6.576    6.576
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.575    6.576
 sum_up_and_integrate               110 10.3    0.150    0.152    6.552    6.568
 integrate_v_rspace                 110 11.3    0.003    0.004    6.401    6.416
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.361    6.396
 calculate_rho_elec                 110  8.6    0.227    0.227    6.361    6.396
 dbcsr_mm_accdrv_process          11614 15.7    3.247    3.533    5.517    5.917
 make_m2s                          4110 13.4    0.043    0.044    5.208    5.708
 qs_ot_get_p                        110 10.4    0.001    0.001    5.602    5.651
 make_images                       4110 14.4    0.880    0.929    5.020    5.520
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.019    4.933    5.444
 cp_fm_cholesky_invert               11 10.9    5.292    5.296    5.292    5.296
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.667    5.156
 apply_single                       110 13.6    0.000    0.000    4.667    5.155
 ot_diis_step                        99 11.5    0.015    0.015    4.531    4.531
 qs_ot_p2m_diag                      48 11.0    0.151    0.156    3.999    4.006
 multiply_cannon_sync_h2d          8220 15.4    3.950    3.960    3.950    3.960
 grid_integrate_task_list           110 12.3    3.652    3.705    3.652    3.705
 pw_transfer                       1331 11.6    0.073    0.074    3.637    3.643
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.641    3.642
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.157    3.609
 hybrid_alltoall_any               4261 16.3    0.256    0.550    2.872    3.578
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.561    3.562
 calculate_dm_sparse                110  9.5    0.001    0.001    3.507    3.539
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    3.521    3.528
 make_images_data                  4110 15.4    0.043    0.045    2.799    3.458
 calculate_first_density_matrix       1  7.0    0.000    0.000    3.231    3.236
 wfi_extrapolate                     11  7.9    0.001    0.001    3.231    3.231
 fft_wrap_pw1pw2_140                451 13.1    0.215    0.215    3.059    3.064
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.050    3.054
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.975    2.975
 cp_fm_diag_elpa_base                48 14.0    2.451    2.634    2.973    2.973
 density_rs2pw                      110  9.6    0.004    0.004    2.873    2.887
 fft3d_ps                          1111 14.6    1.263    1.271    2.733    2.738
 grid_collocate_task_list           110  9.6    2.623    2.645    2.623    2.645
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.554    2.567
 jit_kernel_multiply                 10 15.4    2.066    2.433    2.066    2.433
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.368    2.392
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    2.176    2.233
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.084    2.175
 rs_pw_transfer                     902 11.9    0.011    0.011    2.099    2.143
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    2.048    2.068
 cp_fm_cholesky_decompose            22 10.9    2.030    2.049    2.030    2.049
 mp_alltoall_d11v                  2046 13.8    1.981    2.033    1.981    2.033
 potential_pw2rs                    110 12.3    0.021    0.022    2.022    2.026
 qs_create_task_list                 11  7.9    0.000    0.001    1.901    1.949
 generate_qs_task_list               11  8.9    0.739    0.793    1.901    1.949
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.787    1.791
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.742    1.776
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=85.836000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1211.909091, yerr=66.147843
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             627.937280E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955383376
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4103                  56513.
 MP_Allreduce        11298                    784.
 MP_Sync               170
 MP_Alltoall          2226                2699530.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_comm_split          83
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.029  202.499  202.501
 qs_mol_dyn_low                       1  2.0    0.003    0.004  202.129  202.143
 qs_forces                           11  3.9    0.003    0.004  202.044  202.045
 qs_energies                         11  4.9    0.001    0.002  196.547  196.562
 scf_env_do_scf                      11  5.9    0.001    0.001  179.873  179.877
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  159.217  159.218
 dbcsr_multiply_generic            2507 12.6    0.180    0.195  124.207  125.073
 velocity_verlet                     10  3.0    0.001    0.001  121.943  121.944
 qs_scf_new_mos                     117  7.6    0.001    0.001  120.702  120.995
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  120.702  120.995
 ot_scf_mini                        117  9.6    0.003    0.003  114.096  114.360
 multiply_cannon                   2507 13.6    0.236    0.246  101.084  103.064
 multiply_cannon_loop              2507 14.6    2.112    2.182   98.877  100.491
 ot_mini                            117 10.6    0.001    0.001   65.452   65.713
 multiply_cannon_multrec          60168 15.6   33.203   35.681   41.606   43.592
 qs_ot_get_derivative               117 11.6    0.001    0.001   40.442   40.689
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.183   33.647
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   33.183   33.647
 mp_waitall_1                    267128 16.5   28.214   31.014   28.214   31.014
 qs_ks_update_qs_env                128  7.6    0.001    0.001   29.872   30.295
 multiply_cannon_sync_h2d         60168 15.6   27.324   29.393   27.324   29.393
 qs_ot_get_p                        128 10.4    0.001    0.001   26.739   27.049
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.417   25.339
 apply_single                       128 13.6    0.001    0.001   24.416   25.339
 ot_diis_step                       117 11.6    0.007    0.008   24.642   24.643
 init_scf_loop                       11  6.9    0.000    0.000   20.580   20.581
 qs_ot_p2m_diag                      83 11.4    0.077    0.091   20.043   20.137
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   18.741   18.932
 multiply_cannon_metrocomm3       60168 15.6    0.115    0.119   15.668   17.694
 cp_dbcsr_syevd                      83 12.4    0.004    0.005   17.467   17.469
 prepare_preconditioner              11  7.9    0.000    0.000   15.975   16.024
 make_preconditioner                 11  8.9    0.000    0.000   15.975   16.024
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.203   15.369
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   14.646   14.650
 cp_fm_redistribute_end              83 14.4   11.581   14.573   11.592   14.576
 cp_fm_diag_elpa_base                83 14.4    2.942   14.195    2.973   14.320
 make_m2s                          5014 13.6    0.104    0.113   13.818   14.208
 make_images                       5014 14.6    0.401    0.418   13.636   14.037
 sum_up_and_integrate               128 10.3    0.089    0.105   13.897   13.925
 integrate_v_rspace                 128 11.3    0.003    0.004   13.808   13.840
 qs_rho_update_rho_low              128  7.7    0.001    0.001   12.543   12.681
 calculate_rho_elec                 128  8.7    0.045    0.063   12.543   12.680
 init_scf_run                        11  5.9    0.000    0.001   12.514   12.514
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.514   12.514
 mp_sum_l                          7870 13.0    8.035    9.185    8.035    9.185
 cp_fm_cholesky_invert               11 10.9    9.106    9.113    9.106    9.113
 wfi_extrapolate                     11  7.9    0.001    0.001    8.998    8.999
 calculate_dm_sparse                128  9.5    0.001    0.001    8.716    8.804
 dbcsr_mm_accdrv_process         124484 16.2    3.289    3.440    7.965    8.516
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    7.904    8.053
 multiply_cannon_metrocomm1       60168 15.6    0.089    0.094    6.034    7.948
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    7.852    7.936
 make_images_data                  5014 15.6    0.068    0.073    6.773    7.715
 grid_integrate_task_list           128 12.3    7.069    7.505    7.069    7.505
 hybrid_alltoall_any               5200 16.5    0.289    2.242    5.940    7.323
 pw_transfer                       1547 11.6    0.074    0.108    6.807    7.080
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.013    6.604    6.849
 density_rs2pw                      128  9.7    0.006    0.007    6.362    6.839
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.636    6.644
 rs_pw_transfer                    1046 11.9    0.017    0.018    5.553    5.958
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.795    5.870
 fft_wrap_pw1pw2_140                523 13.2    0.445    0.511    5.692    5.865
 fft3d_ps                          1291 14.7    2.094    2.568    5.398    5.587
 mp_alltoall_d11v                  2415 14.1    4.233    5.431    4.233    5.431
 grid_collocate_task_list           128  9.7    4.722    5.114    4.722    5.114
 cp_fm_cholesky_decompose            22 10.9    4.661    4.675    4.661    4.675
 mp_sum_d                          4469 12.2    3.641    4.478    3.641    4.478
 potential_pw2rs                    128 12.3    0.009    0.010    4.396    4.433
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=202.501000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=593.818182, yerr=6.886326
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.183246E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               5975232       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.7
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             831.369216E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2406720
 MPI messages size (bytes):
  total size                         4.100942E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.703955E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               70860               2317615104
     32768 < size <=   131072              722992              55511613440
    131072 < size <=  4194304             1375664            1398181724160
   4194304 < size <= 16777216              154704            1463834332048
  16777216 < size                           67584            1181116006400
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4075                  57341.
 MP_Allreduce        11227                    947.
 MP_Sync               170
 MP_Alltoall          1969                4913833.
 MP_SendRecv         12032                  47072.
 MP_ISendRecv        12032                  47072.
 MP_Wait             25916
 MP_comm_split          83
 MP_ISend            11748                 212467.
 MP_IRecv            11748                 212467.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.050  194.781  194.782
 qs_mol_dyn_low                       1  2.0    0.004    0.007  194.375  194.389
 qs_forces                           11  3.9    0.003    0.006  191.330  191.330
 qs_energies                         11  4.9    0.001    0.002  183.795  184.654
 scf_env_do_scf                      11  5.9    0.001    0.001  167.195  167.205
 scf_env_do_scf_inner_loop          117  6.6    0.011    0.066  134.360  134.363
 velocity_verlet                     10  3.0    0.001    0.002  121.697  121.760
 dbcsr_multiply_generic            2507 12.6    0.185    0.190   98.351   99.577
 qs_scf_new_mos                     117  7.6    0.001    0.001   95.959   96.520
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   95.958   96.519
 ot_scf_mini                        117  9.6    0.095    0.732   91.152   91.806
 multiply_cannon                   2507 13.6    0.483    0.534   77.756   81.884
 multiply_cannon_loop              2507 14.6    1.255    1.299   74.681   77.106
 ot_mini                            117 10.6    0.001    0.002   50.537   51.205
 multiply_cannon_multrec          30084 15.6   22.007   27.265   32.127   38.030
 mp_waitall_1                    214728 16.6   24.499   37.201   24.499   37.201
 rebuild_ks_matrix                  128  8.3    0.001    0.001   32.544   33.150
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.021   32.543   33.149
 init_scf_loop                       11  6.9    0.000    0.000   32.748   32.749
 qs_ks_update_qs_env                128  7.6    0.001    0.001   29.346   29.905
 qs_ot_get_derivative               117 11.6    0.006    0.101   28.693   29.415
 prepare_preconditioner              11  7.9    0.000    0.000   28.240   28.325
 make_preconditioner                 11  8.9    0.000    0.000   28.240   28.325
 multiply_cannon_metrocomm3       30084 15.6    0.095    0.100   15.764   28.284
 make_full_inverse_cholesky          11  9.9    0.000    0.000   26.977   27.545
 qs_ot_get_p                        128 10.4    0.001    0.001   22.422   23.106
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   21.890   22.901
 apply_single                       128 13.6    0.001    0.001   21.890   22.901
 multiply_cannon_sync_h2d         30084 15.6   19.346   22.134   19.346   22.134
 ot_diis_step                       117 11.6    0.017    0.034   21.663   21.665
 qs_ot_p2m_diag                      83 11.4    0.188    0.216   17.390   17.424
 cp_fm_cholesky_invert               11 10.9   16.491   16.504   16.491   16.504
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   16.235   16.237
 make_m2s                          5014 13.6    0.089    0.097   14.231   15.565
 make_images                       5014 14.6    1.164    1.348   14.021   15.355
 sum_up_and_integrate               128 10.3    0.116    0.134   14.242   14.268
 integrate_v_rspace                 128 11.3    0.003    0.003   14.125   14.157
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   13.092   13.099
 cp_fm_redistribute_end              83 14.4    7.637   13.011    7.649   13.013
 qs_rho_update_rho_low              128  7.7    0.001    0.001   12.888   12.928
 calculate_rho_elec                 128  8.7    0.088    0.105   12.887   12.928
 cp_fm_diag_elpa_base                83 14.4    5.129   12.495    5.347   12.874
 init_scf_run                        11  5.9    0.000    0.001   11.713   11.714
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   11.713   11.714
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   10.939   11.390
 multiply_cannon_metrocomm4       27577 15.6    0.095    0.109    3.749   10.802
 mp_irecv_dv                      69486 16.3    3.556   10.411    3.556   10.411
 dbcsr_mm_accdrv_process          62242 16.2    4.931    5.809    9.577   10.299
 make_images_data                  5014 15.6    0.066    0.073    8.614   10.176
 hybrid_alltoall_any               5200 16.5    0.343    1.519    7.324    8.931
 wfi_extrapolate                     11  7.9    0.001    0.002    8.345    8.345
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.988    7.821
 pw_transfer                       1547 11.6    0.085    0.102    7.657    7.735
 grid_integrate_task_list           128 12.3    7.181    7.524    7.181    7.524
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    7.432    7.496
 density_rs2pw                      128  9.7    0.006    0.006    6.652    7.046
 cp_fm_cholesky_decompose            22 10.9    6.918    7.001    6.918    7.001
 calculate_dm_sparse                128  9.5    0.001    0.002    6.698    6.863
 mp_sum_l                          7870 13.0    4.607    6.857    4.607    6.857
 fft_wrap_pw1pw2_140                523 13.2    0.469    0.515    6.498    6.557
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.175    6.185
 rs_pw_transfer                    1046 11.9    0.014    0.017    5.543    5.968
 fft3d_ps                          1291 14.7    2.782    2.960    5.804    5.838
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.300    5.467
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    5.333    5.397
 grid_collocate_task_list           128  9.7    4.911    5.256    4.911    5.256
 mp_sum_d                          4455 12.2    3.538    5.091    3.538    5.091
 mp_allgather_i34                  2507 14.6    1.642    4.816    1.642    4.816
 potential_pw2rs                    128 12.3    0.014    0.016    4.759    4.777
 mp_alltoall_d11v                  2415 14.1    4.043    4.672    4.043    4.672
 dbcsr_complete_redistribute        395 12.7    0.767    0.856    3.096    3.958
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=194.782000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=791.090909, yerr=2.644189
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022121472       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444702699520       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796573E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.906045E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499744       0.0%      0.0%    100.0%
 number of processed stacks               3951168       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1697.1
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             930.648064E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1033760
 MPI messages size (bytes):
  total size                         2.695213E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.607194E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              279168              36591108096
    131072 < size <=  4194304              654272             987691483136
   4194304 < size <= 16777216               65184             925172769472
  16777216 < size                           28448             745747251200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4085                  57194.
 MP_Allreduce        11251                    986.
 MP_Sync               168
 MP_Alltoall          1700                9383497.
 MP_SendRecv          7874                  75008.
 MP_ISendRecv         7874                  75008.
 MP_Wait             21654
 MP_comm_split          82
 MP_ISend            11660                 275234.
 MP_IRecv            11660                 275234.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.081    0.184  176.059  176.060
 qs_mol_dyn_low                       1  2.0    0.004    0.005  175.455  175.493
 qs_forces                           11  3.9    0.003    0.005  175.340  175.351
 qs_energies                         11  4.9    0.006    0.021  168.854  168.867
 scf_env_do_scf                      11  5.9    0.008    0.017  153.245  153.246
 scf_env_do_scf_inner_loop          116  6.6    0.020    0.044  117.604  117.611
 velocity_verlet                     10  3.0    0.004    0.022  111.317  111.319
 dbcsr_multiply_generic            2485 12.5    0.181    0.187   81.126   82.448
 qs_scf_new_mos                     116  7.6    0.001    0.001   81.462   81.827
 qs_scf_loop_do_ot                  116  8.6    0.001    0.002   81.462   81.826
 ot_scf_mini                        116  9.6    0.004    0.006   77.380   77.815
 multiply_cannon                   2485 13.5    0.498    0.523   61.491   65.143
 multiply_cannon_loop              2485 14.5    0.851    0.880   58.141   61.455
 ot_mini                            116 10.6    0.001    0.001   42.233   42.675
 init_scf_loop                       11  6.9    0.233    0.469   35.534   35.535
 mp_waitall_1                    169034 16.6   24.249   34.099   24.249   34.099
 prepare_preconditioner              11  7.9    0.000    0.000   30.919   30.979
 make_preconditioner                 11  8.9    0.000    0.001   30.919   30.979
 rebuild_ks_matrix                  127  8.3    0.001    0.001   29.761   30.359
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.022   29.761   30.359
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.561   29.970
 qs_ks_update_qs_env                127  7.6    0.001    0.001   26.857   27.405
 multiply_cannon_multrec          19880 15.5   13.476   16.468   22.114   25.053
 multiply_cannon_metrocomm3       19880 15.5    0.060    0.065   14.721   24.830
 qs_ot_get_derivative               116 11.6    0.001    0.002   22.718   23.163
 qs_ot_get_p                        127 10.4    0.001    0.001   20.792   21.310
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.653   20.499
 apply_single                       127 13.6    0.001    0.001   19.652   20.498
 ot_diis_step                       116 11.6    0.018    0.021   19.415   19.415
 qs_ot_p2m_diag                      82 11.4    0.262    0.269   16.330   16.336
 multiply_cannon_sync_h2d         19880 15.5   14.238   15.984   14.238   15.984
 make_m2s                          4970 13.5    0.080    0.085   14.418   15.477
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   15.334   15.335
 make_images                       4970 14.5    1.176    1.254   14.186   15.244
 cp_fm_cholesky_invert               11 10.9   14.441   14.451   14.441   14.451
 sum_up_and_integrate               127 10.3    0.131    0.145   14.023   14.047
 integrate_v_rspace                 127 11.3    0.003    0.004   13.891   13.919
 qs_rho_update_rho_low              127  7.7    0.001    0.001   12.911   12.949
 calculate_rho_elec                 127  8.7    0.131    0.147   12.911   12.948
 cp_fm_diag_elpa                     82 13.4    0.000    0.001   12.203   12.205
 cp_fm_redistribute_end              82 14.4    4.609   12.132    4.622   12.134
 cp_fm_diag_elpa_base                82 14.4    7.088   11.550    7.491   12.002
 init_scf_run                        11  5.9    0.000    0.001   10.559   10.560
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.559   10.560
 make_images_data                  4970 15.5    0.061    0.070    8.820   10.292
 hybrid_alltoall_any               5155 16.4    0.425    1.915    7.858    9.525
 multiply_cannon_metrocomm4       17395 15.5    0.061    0.071    3.379    9.196
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    8.873    9.169
 mp_irecv_dv                      49801 16.2    3.258    8.952    3.258    8.952
 dbcsr_mm_accdrv_process          41158 16.2    4.365    5.185    8.093    8.266
 pw_transfer                       1535 11.6    0.084    0.104    7.700    7.814
 grid_integrate_task_list           127 12.3    7.274    7.738    7.274    7.738
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.010    7.479    7.598
 cp_fm_upper_to_full                104 14.5    5.750    7.518    5.750    7.518
 wfi_extrapolate                     11  7.9    0.001    0.001    7.362    7.362
 cp_fm_cholesky_decompose            22 10.9    7.307    7.327    7.307    7.327
 density_rs2pw                      127  9.7    0.006    0.006    6.399    6.873
 dbcsr_complete_redistribute        393 12.7    1.176    1.211    4.812    6.808
 fft_wrap_pw1pw2_140                519 13.2    0.474    0.527    6.551    6.685
 calculate_dm_sparse                127  9.5    0.001    0.002    5.892    6.011
 fft3d_ps                          1281 14.7    2.677    2.908    5.760    5.817
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002    3.627    5.625
 grid_collocate_task_list           127  9.7    5.035    5.604    5.035    5.604
 rs_pw_transfer                    1038 11.9    0.014    0.015    4.970    5.441
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.421    5.427
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.604    5.343
 mp_sum_l                          7804 13.0    3.470    4.929    3.470    4.929
 mp_allgather_i34                  2485 14.5    1.879    4.862    1.879    4.862
 mp_alltoall_d11v                  2401 14.1    4.210    4.795    4.210    4.795
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.521    4.686
 potential_pw2rs                    127 12.3    0.020    0.022    4.468    4.490
 mp_alltoall_i22                    712 14.1    2.205    4.385    2.205    4.385
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    2.338    4.088
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    3.965    3.994
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    3.742    3.745
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=176.060000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=883.818182, yerr=8.663593
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.353788E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               5977344       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.148461E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1143192
 MPI messages size (bytes):
  total size                         2.023815E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.770319E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              319024              36042702848
    131072 < size <=  4194304              715736             785529176064
   4194304 < size <= 16777216               70320             665379241840
  16777216 < size                           30720             536870912000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4075                  57332.
 MP_Allreduce        11226                   1068.
 MP_Sync               170
 MP_Alltoall          1712               12503084.
 MP_SendRecv          5888                  75008.
 MP_ISendRecv         5888                  75008.
 MP_Wait             22442
 MP_comm_split          83
 MP_ISend            14952                 244818.
 MP_IRecv            14952                 244818.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.056  188.308  188.309
 qs_mol_dyn_low                       1  2.0    0.004    0.005  187.899  187.911
 qs_forces                           11  3.9    0.009    0.055  187.769  187.789
 qs_energies                         11  4.9    0.003    0.008  180.727  180.738
 scf_env_do_scf                      11  5.9    0.011    0.029  163.736  163.746
 velocity_verlet                     10  3.0    0.001    0.002  123.498  123.502
 scf_env_do_scf_inner_loop          117  6.6    0.098    0.262  116.850  116.861
 dbcsr_multiply_generic            2507 12.6    0.190    0.197   80.899   81.575
 qs_scf_new_mos                     117  7.6    0.001    0.001   81.010   81.255
 qs_scf_loop_do_ot                  117  8.6    0.017    0.052   81.009   81.255
 ot_scf_mini                        117  9.6    0.021    0.053   76.494   76.816
 multiply_cannon                   2507 13.6    0.553    0.600   54.773   57.294
 multiply_cannon_loop              2507 14.6    1.187    1.206   51.120   52.819
 init_scf_loop                       11  6.9    0.026    0.077   46.748   46.749
 ot_mini                            117 10.6    0.077    0.609   42.954   43.289
 prepare_preconditioner              11  7.9    0.000    0.000   42.066   42.092
 make_preconditioner                 11  8.9    0.000    0.001   42.066   42.092
 make_full_inverse_cholesky          11  9.9    0.000    0.000   35.760   40.664
 multiply_cannon_multrec          30084 15.6   14.237   19.069   26.157   30.692
 rebuild_ks_matrix                  128  8.3    0.001    0.001   29.068   29.279
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.024   29.067   29.278
 mp_waitall_1                    147882 16.7   17.491   27.971   17.491   27.971
 qs_ks_update_qs_env                128  7.6    0.001    0.001   26.292   26.485
 qs_ot_get_derivative               117 11.6    0.002    0.006   22.735   23.076
 make_m2s                          5014 13.6    0.095    0.100   20.800   21.849
 make_images                       5014 14.6    1.993    2.280   20.493   21.539
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   19.623   20.104
 apply_single                       128 13.6    0.001    0.001   19.623   20.103
 ot_diis_step                       117 11.6    0.019    0.028   20.016   20.093
 qs_ot_get_p                        128 10.4    0.001    0.001   19.776   20.072
 cp_fm_cholesky_invert               11 10.9   16.161   16.170   16.161   16.170
 cp_fm_upper_to_full                105 14.7   10.898   16.115   10.898   16.115
 qs_ot_p2m_diag                      83 11.4    0.344    0.391   15.529   15.582
 multiply_cannon_metrocomm3       30084 15.6    0.048    0.050    6.230   15.304
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   14.248   14.250
 sum_up_and_integrate               128 10.3    0.141    0.153   14.041   14.069
 integrate_v_rspace                 128 11.3    0.003    0.004   13.900   13.933
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.114   13.158
 calculate_rho_elec                 128  8.7    0.175    0.190   13.113   13.157
 make_images_data                  5014 15.6    0.065    0.069   11.310   12.776
 dbcsr_complete_redistribute        395 12.7    1.521    1.634    8.987   12.691
 multiply_cannon_sync_h2d         30084 15.6   11.698   12.662   11.698   12.662
 dbcsr_mm_accdrv_process          62264 16.2    7.320    8.363   11.496   12.045
 hybrid_alltoall_any               5200 16.5    0.524    2.188   10.009   11.929
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    7.583   11.257
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   11.109   11.112
 init_scf_run                        11  5.9    0.000    0.001   11.038   11.039
 scf_env_initial_rho_setup           11  6.9    0.002    0.005   11.037   11.039
 cp_fm_redistribute_end              83 14.4    1.920   11.023    1.936   11.029
 cp_fm_diag_elpa_base                83 14.4    8.485   10.409    9.058   10.884
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.284    9.916
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.404    9.608
 mp_alltoall_i22                    716 14.1    5.558    9.231    5.558    9.231
 pw_transfer                       1547 11.6    0.085    0.100    7.872    7.938
 grid_integrate_task_list           128 12.3    7.520    7.843    7.520    7.843
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    7.648    7.716
 cp_fm_cholesky_decompose            22 10.9    7.541    7.648    7.541    7.648
 wfi_extrapolate                     11  7.9    0.001    0.003    7.636    7.636
 multiply_cannon_metrocomm4       25070 15.6    0.076    0.087    2.730    7.200
 mp_irecv_dv                      76098 16.2    2.585    6.934    2.585    6.934
 fft_wrap_pw1pw2_140                523 13.2    0.481    0.494    6.751    6.836
 calculate_dm_sparse                128  9.5    0.001    0.001    6.538    6.631
 density_rs2pw                      128  9.7    0.006    0.006    6.260    6.601
 fft3d_ps                          1291 14.7    2.793    2.863    5.883    5.938
 grid_collocate_task_list           128  9.7    5.204    5.561    5.204    5.561
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.475    5.532
 mp_alltoall_d11v                  2415 14.1    4.839    5.412    4.839    5.412
 rs_pw_transfer                    1046 11.9    0.014    0.015    4.573    4.899
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.419    4.555
 mp_sum_l                          7870 13.0    3.412    4.525    3.412    4.525
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.377    4.452
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    4.427    4.428
 potential_pw2rs                    128 12.3    0.022    0.023    4.240    4.258
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    4.168    4.230
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=188.309000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1078.545455, yerr=22.431162
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430456039424       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992003932160       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613072052224       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239176077312       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239176077312       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.233020E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.910120E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806383904       0.0%      0.0%    100.0%
 number of processed stacks               1976928       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3442.9
 marketing flops                   145.650931E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank               1.487864E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  242784
 MPI messages size (bytes):
  total size                         1.341806E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.526748E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              115008              60297314304
   4194304 < size <= 16777216              105840             554906419200
  16777216 < size                           20352             726592466352
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         9010                     51.
 MP_Alltoall          9724                 794507.
 MP_ISend            40420                2100460.
 MP_IRecv            40420                2099564.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4043                  57630.
 MP_Allreduce        11104                   1171.
 MP_Sync                88
 MP_Alltoall          1724               18848034.
 MP_SendRecv          3870                 122880.
 MP_ISendRecv         3870                 122880.
 MP_Wait             16244
 MP_ISend            10760                 423501.
 MP_IRecv            10760                 423501.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.038  172.026  172.027
 qs_mol_dyn_low                       1  2.0    0.003    0.003  171.653  171.665
 qs_forces                           11  3.9    0.002    0.003  171.547  171.553
 qs_energies                         11  4.9    0.001    0.002  164.257  164.264
 scf_env_do_scf                      11  5.9    0.001    0.001  147.007  147.018
 velocity_verlet                     10  3.0    0.001    0.001  112.418  112.422
 scf_env_do_scf_inner_loop          118  6.6    0.003    0.008  111.295  111.296
 qs_scf_new_mos                     118  7.6    0.001    0.001   75.534   75.686
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   75.533   75.686
 dbcsr_multiply_generic            2529 12.6    0.184    0.190   75.210   75.648
 ot_scf_mini                        118  9.6    0.004    0.004   71.043   71.123
 multiply_cannon                   2529 13.6    0.593    0.635   55.414   59.897
 multiply_cannon_loop              2529 14.6    0.449    0.460   50.683   51.496
 ot_mini                            118 10.6    0.001    0.001   40.004   40.085
 init_scf_loop                       11  6.9    0.000    0.000   35.562   35.563
 mp_waitall_1                    126876 16.7   25.952   32.886   25.952   32.886
 prepare_preconditioner              11  7.9    0.000    0.000   31.500   31.527
 make_preconditioner                 11  8.9    0.000    0.000   31.500   31.527
 make_full_inverse_cholesky          11  9.9    0.000    0.000   29.311   29.558
 rebuild_ks_matrix                  129  8.3    0.001    0.001   28.670   28.749
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.017   28.669   28.748
 qs_ks_update_qs_env                129  7.6    0.001    0.001   26.218   26.295
 multiply_cannon_multrec          10116 15.6   10.507   14.368   18.164   21.049
 ot_diis_step                       118 11.6    0.020    0.021   20.014   20.014
 qs_ot_get_derivative               118 11.6    0.002    0.002   19.919   19.997
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   19.548   19.853
 apply_single                       129 13.6    0.001    0.001   19.548   19.853
 multiply_cannon_metrocomm3       10116 15.6    0.023    0.025   12.359   19.562
 make_m2s                          5058 13.6    0.066    0.071   16.246   18.773
 make_images                       5058 14.6    2.331    2.708   15.936   18.467
 qs_ot_get_p                        129 10.4    0.001    0.001   18.211   18.303
 cp_fm_cholesky_invert               11 10.9   17.955   17.961   17.955   17.961
 qs_ot_p2m_diag                      84 11.4    0.502    0.508   14.267   14.283
 sum_up_and_integrate               129 10.3    0.183    0.192   14.033   14.082
 integrate_v_rspace                 129 11.3    0.004    0.004   13.850   13.908
 qs_rho_update_rho_low              129  7.7    0.001    0.001   13.118   13.156
 calculate_rho_elec                 129  8.7    0.260    0.271   13.117   13.155
 cp_dbcsr_syevd                      84 12.4    0.005    0.005   13.129   13.130
 make_images_data                  5058 15.6    0.054    0.062    9.896   12.401
 multiply_cannon_sync_h2d         10116 15.6   11.774   12.262   11.774   12.262
 hybrid_alltoall_any               5245 16.5    0.817    3.820    9.719   12.005
 init_scf_run                        11  5.9    0.000    0.001   10.479   10.479
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.479   10.479
 cp_fm_diag_elpa                     84 13.4    0.000    0.000   10.061   10.074
 cp_fm_diag_elpa_base                84 14.4    9.800    9.892   10.049   10.062
 grid_integrate_task_list           129 12.3    7.880    8.145    7.880    8.145
 dbcsr_mm_accdrv_process          20934 16.1    2.745    3.807    7.279    7.982
 qs_ot_get_derivative_diag           78 12.4    0.002    0.003    7.925    7.980
 cp_fm_cholesky_decompose            22 10.9    7.848    7.961    7.848    7.961
 pw_transfer                       1559 11.6    0.084    0.090    7.740    7.762
 multiply_cannon_metrocomm1       10116 15.6    0.029    0.030    4.437    7.727
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.010    7.517    7.544
 mp_allgather_i34                  2529 14.6    2.922    7.514    2.922    7.514
 wfi_extrapolate                     11  7.9    0.001    0.001    7.207    7.207
 fft_wrap_pw1pw2_140                527 13.2    0.505    0.524    6.631    6.663
 calculate_dm_sparse                129  9.5    0.001    0.001    6.536    6.617
 density_rs2pw                      129  9.7    0.005    0.006    5.917    6.195
 grid_collocate_task_list           129  9.7    5.522    5.766    5.522    5.766
 fft3d_ps                          1301 14.7    2.742    2.824    5.667    5.709
 dbcsr_complete_redistribute        397 12.7    2.135    2.222    5.285    5.655
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    5.200    5.201
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.106    5.115
 mp_alltoall_d11v                  2429 14.1    4.681    5.023    4.681    5.023
 rs_pw_transfer                    1054 12.0    0.013    0.013    4.016    4.310
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.035    4.055
 potential_pw2rs                    129 12.3    0.027    0.028    4.011    4.027
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.572    3.878
 multiply_cannon_metrocomm4        7587 15.6    0.024    0.027    1.856    3.822
 copy_fm_to_dbcsr                   210 11.7    0.002    0.002    3.473    3.790
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.741    3.774
 mp_irecv_dv                      29102 15.9    1.818    3.753    1.818    3.753
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    3.709    3.742
 copy_dbcsr_to_fm                   187 11.8    0.004    0.004    3.522    3.603
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=172.027000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1389.727273, yerr=40.400475
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430454546432       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1975684956160       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992006770688       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613065416704       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239182565376       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239182565376       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.245913E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.787674E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806580192       0.0%      0.0%    100.0%
 number of processed stacks               1982496       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3433.3
 marketing flops                   145.663816E+12
 -------------------------------------------------------------------------------
 # multiplications                           2535
 max memory usage/rank               3.062534E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  101400
 MPI messages size (bytes):
  total size                         1.145171E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.293599E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               45888              35634806784
   4194304 < size <= 16777216               44720             382939955200
  16777216 < size                           10176             726592540656
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4049                  58480.
 MP_Allreduce        11119                   1510.
 MP_Sync                88
 MP_Alltoall          1724               36993632.
 MP_SendRecv          1806                 218624.
 MP_ISendRecv         1806                 218624.
 MP_Wait              9876
 MP_ISend             6456                1080169.
 MP_IRecv             6456                1080169.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.025    0.064  287.095  287.096
 qs_mol_dyn_low                       1  2.0    0.003    0.003  286.567  286.578
 qs_forces                           11  3.9    0.003    0.003  286.476  286.478
 qs_energies                         11  4.9    0.002    0.002  277.957  277.974
 scf_env_do_scf                      11  5.9    0.001    0.001  256.477  256.493
 velocity_verlet                     10  3.0    0.001    0.002  208.009  208.017
 scf_env_do_scf_inner_loop          118  6.6    0.003    0.008  133.144  133.146
 init_scf_loop                       11  6.9    0.000    0.000  123.081  123.086
 prepare_preconditioner              11  7.9    0.000    0.000  118.419  118.450
 make_preconditioner                 11  8.9    0.000    0.000  118.419  118.450
 make_full_inverse_cholesky          11  9.9    0.000    0.000   94.966  115.545
 qs_scf_new_mos                     118  7.6    0.001    0.001   90.987   91.100
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   90.987   91.099
 ot_scf_mini                        118  9.6    0.004    0.004   86.155   86.190
 dbcsr_multiply_generic            2535 12.6    0.215    0.222   83.811   84.431
 cp_fm_upper_to_full                106 14.8   52.728   75.113   52.728   75.113
 multiply_cannon                   2535 13.6    0.717    0.751   59.437   59.961
 multiply_cannon_loop              2535 14.6    0.477    0.487   55.695   57.116
 ot_mini                            118 10.6    0.001    0.001   44.842   44.877
 dbcsr_complete_redistribute        397 12.7    4.010    4.056   29.054   41.569
 copy_fm_to_dbcsr                   210 11.7    0.002    0.002   25.704   38.240
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000   23.408   35.866
 rebuild_ks_matrix                  129  8.3    0.001    0.001   33.630   33.676
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.018   33.629   33.675
 mp_alltoall_i22                    720 14.1   21.219   33.621   21.219   33.621
 cp_fm_cholesky_invert               11 10.9   33.199   33.205   33.199   33.205
 mp_waitall_1                    104820 16.8   28.378   32.284   28.378   32.284
 qs_ks_update_qs_env                129  7.6    0.001    0.001   31.424   31.476
 qs_ot_get_p                        129 10.4    0.001    0.001   25.651   25.673
 qs_ot_get_derivative               118 11.6    0.002    0.002   24.323   24.359
 qs_ot_p2m_diag                      84 11.4    0.890    0.895   21.659   21.689
 make_m2s                          5070 13.6    0.077    0.079   19.708   20.738
 multiply_cannon_metrocomm3       10140 15.6    0.025    0.025   19.247   20.568
 ot_diis_step                       118 11.6    0.022    0.022   20.487   20.487
 make_images                       5070 14.6    3.800    3.864   19.226   20.259
 cp_dbcsr_syevd                      84 12.4    0.006    0.006   19.912   19.913
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   19.654   19.902
 apply_single                       129 13.6    0.001    0.001   19.654   19.901
 multiply_cannon_multrec          10140 15.6   10.619   12.330   18.125   18.259
 cp_fm_diag_elpa                     84 13.4    0.000    0.000   16.709   16.710
 cp_fm_diag_elpa_base                84 14.4   12.321   13.888   16.705   16.705
 multiply_cannon_sync_h2d         10140 15.6   15.784   15.820   15.784   15.820
 sum_up_and_integrate               129 10.3    0.325    0.326   15.720   15.814
 integrate_v_rspace                 129 11.3    0.004    0.004   15.395   15.489
 qs_rho_update_rho_low              129  7.7    0.001    0.001   14.955   14.974
 calculate_rho_elec                 129  8.7    0.486    0.487   14.955   14.974
 hybrid_alltoall_any               5257 16.5    1.314    3.061   10.545   12.219
 make_images_data                  5070 15.6    0.062    0.066   10.424   12.193
 init_scf_run                        11  5.9    0.000    0.001   11.646   11.647
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   11.646   11.647
 qs_ot_get_derivative_diag           78 12.4    0.002    0.002    9.554    9.587
 dbcsr_mm_accdrv_process          20958 16.1    3.946    5.778    7.264    9.124
 cp_fm_cholesky_decompose            22 10.9    8.799    8.845    8.799    8.845
 grid_integrate_task_list           129 12.3    8.602    8.776    8.602    8.776
 wfi_extrapolate                     11  7.9    0.001    0.001    8.591    8.591
 pw_transfer                       1559 11.6    0.091    0.092    8.295    8.302
 fft_wrap_pw1pw2                   1301 12.7    0.011    0.011    8.060    8.067
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    7.869    7.870
 fft_wrap_pw1pw2_140                527 13.2    0.545    0.549    7.161    7.167
 mp_alltoall_d11v                  2429 14.1    6.864    7.004    6.864    7.004
 calculate_dm_sparse                129  9.5    0.001    0.001    6.666    6.776
 grid_collocate_task_list           129  9.7    6.386    6.426    6.386    6.426
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.284    6.379
 fft3d_ps                          1301 14.7    2.794    2.821    6.087    6.096
 copy_dbcsr_to_fm                   187 11.8    0.004    0.004    5.993    6.073
 density_rs2pw                      129  9.7    0.005    0.005    5.995    6.030
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=287.096000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2722.454545, yerr=191.300983
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.261855E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255669.
 MP_Allreduce         3059                   6274.
 MP_Sync                 4
 MP_Alltoall            54               44549627.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.045   84.591   84.591
 qs_energies                          1  2.0    0.000    0.000   84.135   84.149
 ls_scf                               1  3.0    0.000    0.000   83.221   83.234
 dbcsr_multiply_generic             111  6.7    0.015    0.016   72.463   72.651
 multiply_cannon                    111  7.7    0.017    0.020   55.813   57.127
 multiply_cannon_loop               111  8.7    0.208    0.222   52.425   53.740
 ls_scf_main                          1  4.0    0.000    0.000   52.085   52.086
 density_matrix_trs4                  2  5.0    0.002    0.003   46.658   46.730
 ls_scf_init_scf                      1  4.0    0.000    0.002   28.122   28.123
 ls_scf_init_matrix_S                 1  5.0    0.000    0.002   27.061   27.116
 mp_waitall_1                     11031 10.9   22.338   25.050   22.338   25.050
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   24.947   24.968
 multiply_cannon_multrec           2664  9.7    8.147    8.942   15.578   17.359
 multiply_cannon_sync_h2d          2664  9.7   13.559   15.034   13.559   15.034
 make_m2s                           222  7.7    0.008    0.011   13.004   13.407
 make_images                        222  8.7    0.099    0.109   12.982   13.387
 multiply_cannon_metrocomm1        2664  9.7    0.009    0.010    9.652   11.799
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.458    8.621
 make_images_data                   222  9.7    0.004    0.005    7.556    8.160
 dbcsr_mm_accdrv_process           4760 10.4    0.516    0.622    7.051    8.019
 hybrid_alltoall_any                227 10.6    0.215    1.839    6.482    8.011
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.336    7.249    6.336    7.249
 calculate_norms                   4752  9.8    5.521    6.164    5.521    6.164
 apply_matrix_preconditioner          6  5.3    0.000    0.001    5.022    5.149
 mp_sum_l                           807  5.4    3.120    4.576    3.120    4.576
 make_images_sizes                  222  9.7    0.000    0.000    0.757    3.603
 mp_alltoall_i44                    222 10.7    0.756    3.602    0.756    3.602
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.324    3.459
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.014    2.063    3.254
 mp_irecv_dv                       6231 10.9    2.045    3.225    2.045    3.225
 arnoldi_extremal                     4  6.8    0.000    0.000    3.192    3.216
 arnoldi_normal_ev                    4  7.8    0.001    0.004    3.192    3.216
 build_subspace                      16  8.4    0.009    0.012    3.098    3.100
 ls_scf_post                          1  4.0    0.000    0.000    3.014    3.028
 ls_scf_store_result                  1  5.0    0.000    0.000    2.834    2.876
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.340    2.767
 dbcsr_merge_single_wm              555 10.7    0.457    0.571    2.332    2.759
 make_images_pack                   222  9.7    2.207    2.645    2.208    2.647
 dbcsr_matrix_vector_mult           304  9.0    0.003    0.010    2.305    2.547
 dbcsr_sort_data                    658 11.4    2.132    2.504    2.132    2.504
 dbcsr_matrix_vector_mult_local     304 10.0    2.062    2.453    2.064    2.455
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.186    2.246
 buffer_matrices_ensure_size        222  8.7    1.762    2.067    1.762    2.067
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.732    1.733
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.722    1.724
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    1.722    1.724
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=84.591000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1142.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.119414E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  10339.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.025    0.041   91.643   91.644
 qs_energies                          1  2.0    0.000    0.000   91.216   91.220
 ls_scf                               1  3.0    0.000    0.000   89.904   89.908
 dbcsr_multiply_generic             111  6.7    0.015    0.016   75.831   76.216
 multiply_cannon                    111  7.7    0.028    0.039   53.424   57.685
 ls_scf_main                          1  4.0    0.000    0.000   55.278   55.283
 multiply_cannon_loop               111  8.7    0.115    0.123   50.136   53.521
 density_matrix_trs4                  2  5.0    0.002    0.003   49.649   49.855
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.868   30.869
 mp_waitall_1                      9105 10.9   21.503   30.000   21.503   30.000
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   29.713   29.830
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.299   27.311
 multiply_cannon_multrec           1332  9.7   12.976   16.265   22.324   26.667
 multiply_cannon_metrocomm3        1332  9.7    0.007    0.008   11.948   20.398
 make_m2s                           222  7.7    0.006    0.007   15.564   16.301
 make_images                        222  8.7    1.568    1.916   15.534   16.272
 dbcsr_mm_accdrv_process           4041 10.4    0.252    0.448    8.950   10.593
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.546   10.144    8.546   10.144
 make_images_data                   222  9.7    0.004    0.004    9.000   10.045
 hybrid_alltoall_any                227 10.6    0.522    2.447    8.364    9.550
 mp_sum_l                           807  5.4    5.536    8.635    5.536    8.635
 multiply_cannon_metrocomm4        1221  9.7    0.006    0.008    3.240    7.601
 mp_irecv_dv                       3311 11.0    3.220    7.539    3.220    7.539
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.332    6.978
 calculate_norms                   2376  9.8    5.981    6.608    5.981    6.608
 multiply_cannon_sync_h2d          1332  9.7    4.769    5.825    4.769    5.825
 apply_matrix_preconditioner          6  5.3    0.001    0.001    5.018    5.214
 arnoldi_extremal                     4  6.8    0.000    0.000    4.594    4.612
 arnoldi_normal_ev                    4  7.8    0.001    0.005    4.594    4.612
 build_subspace                      16  8.4    0.014    0.021    4.342    4.345
 ls_scf_post                          1  4.0    0.000    0.000    3.758    3.764
 dbcsr_matrix_vector_mult           304  9.0    0.005    0.017    3.120    3.352
 ls_scf_store_result                  1  5.0    0.000    0.000    3.185    3.277
 dbcsr_matrix_vector_mult_local     304 10.0    2.738    3.220    2.740    3.221
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.507    2.591
 mp_allgather_i34                   111  8.7    0.982    2.489    0.982    2.489
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.185    2.431
 make_images_pack                   222  9.7    2.024    2.421    2.026    2.423
 dbcsr_sort_data                    436 11.2    1.847    2.097    1.847    2.097
 dbcsr_data_new                    4174 10.1    1.612    1.853    1.612    1.853
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=91.644000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1762.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.746638E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265470.
 MP_Allreduce         3058                  11181.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.085    0.459   94.886   94.887
 qs_energies                          1  2.0    0.000    0.000   93.870   93.877
 ls_scf                               1  3.0    0.000    0.001   92.469   92.476
 dbcsr_multiply_generic             111  6.7    0.017    0.023   77.228   77.464
 ls_scf_main                          1  4.0    0.000    0.000   57.818   57.823
 multiply_cannon                    111  7.7    0.089    0.402   52.927   56.976
 multiply_cannon_loop               111  8.7    0.100    0.113   49.332   53.048
 density_matrix_trs4                  2  5.0    0.002    0.003   51.926   52.137
 mp_waitall_1                      7281 11.0   23.978   33.822   23.978   33.822
 ls_scf_init_scf                      1  4.0    0.000    0.001   31.073   31.076
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   29.912   29.971
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.527   27.548
 multiply_cannon_multrec            888  9.7   12.613   15.120   21.301   24.334
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   11.040   22.676
 make_m2s                           222  7.7    0.006    0.008   17.344   18.556
 make_images                        222  8.7    1.970    2.292   17.305   18.517
 make_images_data                   222  9.7    0.004    0.005    9.941   11.016
 hybrid_alltoall_any                227 10.6    0.620    2.866    9.565   11.012
 dbcsr_mm_accdrv_process           3754 10.4    0.240    0.419    8.193    9.454
 mp_sum_l                           807  5.4    5.410    9.230    5.410    9.230
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.822    9.036    7.822    9.036
 multiply_cannon_sync_h2d           888  9.7    6.038    7.346    6.038    7.346
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.191    7.226
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.619    7.224
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.458    6.843
 mp_irecv_dv                       2335 11.1    2.443    6.792    2.443    6.792
 arnoldi_extremal                     4  6.8    0.000    0.000    5.094    5.120
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.094    5.120
 apply_matrix_preconditioner          6  5.3    0.001    0.001    4.941    5.103
 build_subspace                      16  8.4    0.014    0.020    4.790    4.797
 calculate_norms                   1584  9.8    4.264    4.661    4.264    4.661
 mp_allgather_i34                   111  8.7    1.393    3.980    1.393    3.980
 dbcsr_matrix_vector_mult           304  9.0    0.005    0.017    3.419    3.748
 ls_scf_post                          1  4.0    0.002    0.009    3.578    3.586
 dbcsr_matrix_vector_mult_local     304 10.0    3.011    3.579    3.013    3.581
 ls_scf_store_result                  1  5.0    0.000    0.000    3.311    3.402
 ls_scf_dm_to_ks                      2  5.0    0.000    0.001    2.732    2.827
 make_images_sizes                  222  9.7    0.000    0.000    1.089    2.208
 mp_alltoall_i44                    222 10.7    1.089    2.208    1.089    2.208
 make_images_pack                   222  9.7    1.812    2.143    1.815    2.146
 dbcsr_sort_data                    325 11.1    1.877    2.138    1.877    2.138
 dbcsr_data_release                9322 10.9    1.325    2.023    1.325    2.023
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=94.887000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2170.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.332948E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  13371.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.078    0.110   97.729   97.730
 qs_energies                          1  2.0    0.000    0.000   96.862   96.867
 ls_scf                               1  3.0    0.000    0.001   95.151   95.156
 dbcsr_multiply_generic             111  6.7    0.017    0.022   78.267   78.477
 ls_scf_main                          1  4.0    0.000    0.000   58.405   58.406
 multiply_cannon                    111  7.7    0.058    0.115   51.362   56.470
 density_matrix_trs4                  2  5.0    0.002    0.003   52.421   52.565
 multiply_cannon_loop               111  8.7    0.115    0.127   46.290   50.003
 ls_scf_init_scf                      1  4.0    0.001    0.002   33.497   33.499
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   31.972   32.056
 mp_waitall_1                      6369 11.0   22.208   29.595   22.208   29.595
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.477   29.488
 multiply_cannon_multrec           1332  9.7   14.256   17.706   22.268   26.282
 make_m2s                           222  7.7    0.007    0.010   21.085   22.535
 make_images                        222  8.7    3.142    3.592   21.034   22.486
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.004    8.818   17.105
 make_images_data                   222  9.7    0.004    0.004   11.737   13.508
 hybrid_alltoall_any                227 10.6    0.798    3.790   11.053   12.741
 dbcsr_mm_accdrv_process           3641 10.4    0.197    0.400    7.643    9.182
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.265    8.767    7.265    8.767
 mp_sum_l                           807  5.4    4.256    7.589    4.256    7.589
 multiply_cannon_sync_h2d          1332  9.7    5.484    6.248    5.484    6.248
 multiply_cannon_metrocomm4        1110  9.7    0.004    0.006    2.096    6.048
 mp_irecv_dv                       3229 10.9    2.073    5.968    2.073    5.968
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.292    5.895
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.482    5.711
 arnoldi_extremal                     4  6.8    0.000    0.000    5.183    5.199
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.183    5.199
 build_subspace                      16  8.4    0.015    0.021    4.847    4.854
 apply_matrix_preconditioner          6  5.3    0.001    0.001    4.526    4.718
 mp_allgather_i34                   111  8.7    2.203    4.574    2.203    4.574
 calculate_norms                   2376  9.8    4.195    4.546    4.195    4.546
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.017    3.562    3.847
 dbcsr_matrix_vector_mult_local     304 10.0    3.174    3.671    3.176    3.673
 dbcsr_sort_data                    658 11.4    3.042    3.479    3.042    3.479
 ls_scf_post                          1  4.0    0.000    0.001    3.248    3.253
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.776    3.243
 dbcsr_merge_single_wm              555 10.7    0.529    0.663    2.767    3.235
 ls_scf_store_result                  1  5.0    0.000    0.000    2.979    3.052
 ls_scf_dm_to_ks                      2  5.0    0.001    0.010    2.886    2.928
 dbcsr_data_release               10477 10.7    1.590    2.402    1.590    2.402
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.238    2.239
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.214    2.216
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.002    2.214    2.216
 dbcsr_finalize                     304  7.8    0.049    0.061    1.809    1.977
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=97.730000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2724.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.638441E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265558.
 MP_Allreduce         3049                  15663.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.041    0.056  100.004  100.005
 qs_energies                          1  2.0    0.000    0.000   99.194   99.200
 ls_scf                               1  3.0    0.000    0.000   97.252   97.256
 dbcsr_multiply_generic             111  6.7    0.017    0.018   78.444   78.708
 ls_scf_main                          1  4.0    0.000    0.000   62.229   62.229
 multiply_cannon                    111  7.7    0.086    0.137   55.690   60.368
 density_matrix_trs4                  2  5.0    0.002    0.003   55.239   55.313
 multiply_cannon_loop               111  8.7    0.069    0.077   51.227   53.555
 mp_waitall_1                      5436 11.0   27.116   32.595   27.116   32.595
 ls_scf_init_scf                      1  4.0    0.000    0.000   31.051   31.056
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   29.878   29.916
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.689   27.702
 multiply_cannon_multrec            444  9.7   13.999   16.420   21.151   23.748
 make_m2s                           222  7.7    0.004    0.005   17.862   20.304
 make_images                        222  8.7    3.723    4.403   17.800   20.244
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   11.653   16.579
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    6.059   15.348
 make_images_data                   222  9.7    0.003    0.004   10.205   12.474
 hybrid_alltoall_any                227 10.6    0.788    3.760    9.964   12.401
 multiply_cannon_sync_h2d           444  9.7    6.584    8.124    6.584    8.124
 dbcsr_mm_accdrv_process           3003 10.4    0.163    0.345    6.856    8.009
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.542    7.664    6.542    7.664
 mp_allgather_i34                   111  8.7    2.692    7.039    2.692    7.039
 arnoldi_extremal                     4  6.8    0.000    0.000    5.813    5.829
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.813    5.829
 build_subspace                      16  8.4    0.015    0.020    5.421    5.433
 apply_matrix_preconditioner          6  5.3    0.001    0.001    4.614    4.796
 mp_sum_l                           807  5.4    2.925    4.600    2.925    4.600
 dbcsr_matrix_vector_mult           304  9.0    0.007    0.017    4.144    4.356
 dbcsr_matrix_vector_mult_local     304 10.0    3.672    4.151    3.674    4.153
 ls_scf_post                          1  4.0    0.000    0.000    3.972    3.977
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.611    3.913
 mp_irecv_dv                       1241 11.2    1.593    3.893    1.593    3.893
 calculate_norms                    792  9.8    3.534    3.704    3.534    3.704
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.048    3.513
 make_images_sizes                  222  9.7    0.000    0.000    1.005    3.513
 mp_alltoall_i44                    222 10.7    1.005    3.513    1.005    3.513
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.370    3.454
 ls_scf_store_result                  1  5.0    0.000    0.000    3.411    3.453
 dbcsr_finalize                     304  7.8    0.062    0.078    2.207    2.331
 dbcsr_merge_all                    275  8.9    0.473    0.537    2.057    2.166
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=100.005000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3640.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/523d85c309ba47d334e69c8546862257e289ce38_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.760713E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284111.
 MP_Allreduce         3043                  21950.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.076    0.094  107.976  107.976
 qs_energies                          1  2.0    0.000    0.000  106.405  106.418
 ls_scf                               1  3.0    0.000    0.000  103.398  103.412
 dbcsr_multiply_generic             111  6.7    0.023    0.027   77.775   77.883
 ls_scf_main                          1  4.0    0.000    0.000   65.530   65.531
 density_matrix_trs4                  2  5.0    0.002    0.003   56.687   56.739
 multiply_cannon                    111  7.7    0.099    0.159   49.918   51.776
 multiply_cannon_loop               111  8.7    0.067    0.069   46.428   47.409
 ls_scf_init_scf                      1  4.0    0.000    0.000   34.199   34.200
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   32.881   32.890
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   30.049   30.057
 mp_waitall_1                      4527 11.1   22.074   25.974   22.074   25.974
 make_m2s                           222  7.7    0.005    0.005   23.870   24.884
 make_images                        222  8.7    4.589    4.977   23.764   24.775
 multiply_cannon_multrec            444  9.7   17.866   18.539   22.553   23.101
 hybrid_alltoall_any                227 10.6    1.660    3.631   12.910   15.608
 make_images_data                   222  9.7    0.003    0.003   13.118   15.580
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.377   10.876
 multiply_cannon_sync_h2d           444  9.7    8.847    8.894    8.847    8.894
 arnoldi_extremal                     4  6.8    0.000    0.000    7.282    7.294
 arnoldi_normal_ev                    4  7.8    0.004    0.012    7.282    7.294
 build_subspace                      16  8.4    0.026    0.037    6.736    6.747
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.027    5.397    5.544
 apply_matrix_preconditioner          6  5.3    0.001    0.001    5.042    5.303
 dbcsr_matrix_vector_mult_local     304 10.0    4.977    5.285    4.980    5.288
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    4.784    4.874
 dbcsr_mm_accdrv_process           1814 10.4    0.234    0.307    4.512    4.646
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.210    4.348    4.210    4.348
 ls_scf_post                          1  4.0    0.000    0.000    3.670    3.683
 mp_allgather_i34                   111  8.7    1.143    3.581    1.143    3.581
 make_images_sizes                  222  9.7    0.000    0.000    1.421    3.446
 mp_alltoall_i44                    222 10.7    1.421    3.446    1.421    3.446
 ls_scf_store_result                  1  5.0    0.000    0.000    3.423    3.432
 calculate_norms                    792  9.8    3.241    3.280    3.241    3.280
 dbcsr_finalize                     304  7.8    0.082    0.089    3.087    3.130
 qs_energies_init_hamiltonians        1  3.0    0.001    0.001    2.976    2.976
 dbcsr_merge_all                    275  8.9    0.890    0.918    2.872    2.914
 dbcsr_complete_redistribute          5  7.6    1.441    1.471    2.780    2.900
 dbcsr_data_release               12724 10.6    2.332    2.858    2.332    2.858
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.419    2.545
 dbcsr_sort_data                    325 11.1    2.441    2.500    2.441    2.500
 mp_sum_l                           807  5.4    1.650    2.435    1.650    2.435
 dbcsr_new_transposed                 4  7.5    0.244    0.256    2.278    2.291
 dbcsr_frobenius_norm                74  6.6    2.054    2.129    2.188    2.231
 dbcsr_add_d                        103  6.2    0.000    0.000    2.131    2.202
 dbcsr_add_anytype                  103  7.2    0.859    0.890    2.130    2.201
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.189    2.191
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=107.976000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=6860.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 523d85c309ba47d334e69c8546862257e289ce38
Summary: empty
Status: OK