=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 3afb8bf709f96407a7e9894ade69f93aea585134


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10,
#              COSMA 2.6.2, ELPA 2021.11.002, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.0.0, LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.8.0,
#              SIRIUS 7.3.2, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Author: Matthias Krack (19.10.2022)
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.2
USE_ELPA       := 2021.11.002
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.0.0
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.8.0
#USE_QUIP       := b4336484fb65b0e73211a8f920ae4361c7c353fd
USE_SIRIUS     := 7.3.2
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.4
HDF5_VER       := 1.12.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MPI_VERSION=3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   HDF5_VER       := $(strip $(HDF5_VER))
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(HDF5_VER)/lib
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__HDF5
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta_prefixed_scalapack.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/01
 job id: 42369379
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/02
 job id: 42369380
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/03
 job id: 42369381
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/04
 job id: 42369382
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/05
 job id: 42369384
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/06
 job id: 42369385
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/07
 job id: 42369386
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/08
 job id: 42369387
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/09
 job id: 42369388
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/10
 job id: 42369389
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/11
 job id: 42369390
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/12
 job id: 42369391
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/13
 job id: 42369392
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/14
 job id: 42369393
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/15
 job id: 42369395
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/16
 job id: 42369396
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/17
 job id: 42369397
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/18
 job id: 42369398
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/19
 job id: 42369400
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/20
 job id: 42369401
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/21
 job id: 42369402
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/22
 job id: 42369403
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/23
 job id: 42369404
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/24
 job id: 42369405
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/25
 job id: 42369407
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/26
 job id: 42369410
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          344                      9.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.040    0.042  134.076  134.077
 farming_run                          1  2.0  133.240  133.242  134.028  134.029
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.455632E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              228                1113141.
 MP_Allreduce          489                2263609.
 MP_Sync                27
 MP_Alltoall            38                9316958.
 MP_SendRecv            30                 829726.
 MP_ISendRecv          135                 235435.
 MP_Wait               281
 MP_comm_split           8
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.025  114.889  114.889
 qs_energies                          1  2.0    0.000    0.000  114.673  114.676
 mp2_main                             1  3.0    0.000    0.000  112.815  112.818
 mp2_gpw_main                         1  4.0    0.019    0.025  111.905  111.908
 mp2_ri_gpw_compute_in                1  5.0    0.171    0.172   92.884   93.307
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.005   55.458   55.881
 mp2_eri_3c_integrate_gpw           272  7.0    0.153    0.168   41.747   47.139
 get_2c_integrals                     1  6.0    0.000    0.000   36.768   37.254
 integrate_v_rspace                 273  8.0    0.432    0.446   25.119   30.271
 pw_transfer                       6555 10.6    0.379    0.390   27.328   27.835
 grid_integrate_task_list           273  9.0   20.938   26.580   20.938   26.580
 fft_wrap_pw1pw2                   5465 11.4    0.044    0.047   26.008   26.520
 fft_wrap_pw1pw2_100               2178 12.4    1.167    1.226   23.569   24.090
 compute_2c_integrals                 1  7.0    0.002    0.002   19.245   19.246
 rpa_ri_compute_en                    1  5.0    0.000    0.000   18.911   19.095
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.003   18.835   18.945
 mp2_eri_2c_integrate_gpw             1  9.0    2.393    2.435   18.832   18.944
 cp_fm_cholesky_decompose            12  8.2   17.531   18.038   17.531   18.038
 cholesky_decomp                      1  7.0    0.000    0.000   16.360   16.867
 fft3d_s                           5443 13.4   16.113   16.596   16.135   16.616
 ao_to_mo_and_store_B_mult_1        272  7.0   10.874   15.612   10.874   15.612
 calculate_wavefunction             272  8.0    5.406    5.555   12.538   13.161
 rpa_num_int                          1  6.0    0.000    0.001   10.667   10.677
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.626   10.649
 calc_mat_Q                           8  8.0    0.000    0.000    9.394    9.514
 contract_S_to_Q                      8  9.0    0.000    0.000    8.815    8.932
 calc_potential_gpw                 544  9.5    0.005    0.006    8.282    8.636
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.393    8.486
 parallel_gemm_fm_cosma              14 10.1    8.393    8.486    8.393    8.486
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.002    0.002    8.196    8.438
 potential_pw2rs                    545 10.0    0.108    0.110    7.646    8.316
 create_integ_mat                     1  6.0    0.014    0.028    7.727    7.737
 collocate_single_gaussian          272 10.0    0.040    0.043    7.423    7.649
 array2fm                             1  7.0    0.000    0.000    6.692    7.086
 pw_scatter_s                      2720 13.7    4.458    4.670    4.458    4.670
 pw_gather_s                       2722 13.2    3.898    4.290    3.898    4.290
 array2fm_buffer_send                 1  8.0    3.009    3.170    3.009    3.170
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=111.905711, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2728.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          344                     10.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.028    0.038  395.015  395.017
 farming_run                          1  2.0  394.056  394.077  394.979  394.982
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827141120       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788822       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.215984E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              703                 408373.
 MP_Allreduce         1825                  23678.
 MP_Sync                38
 MP_Alltoall            77               23103487.
 MP_SendRecv          2171                2843495.
 MP_ISendRecv         1739                 144022.
 MP_Wait              2051
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.050  209.123  209.124
 qs_energies                          1  2.0    0.000    0.000  208.876  208.906
 scf_env_do_scf                       1  3.0    0.000    0.000  105.743  105.743
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  104.905  104.912
 rebuild_ks_matrix                    4  6.0    0.000    0.000  104.903  104.911
 qs_ks_build_kohn_sham_matrix         4  7.0    0.058    0.066  104.903  104.911
 hfx_ks_matrix                        4  8.0    0.001    0.001  104.523  104.526
 integrate_four_center                4  9.0    0.143    0.456  104.522  104.526
 mp2_main                             1  3.0    0.000    0.000  102.849  102.879
 mp2_gpw_main                         1  4.0    0.035    0.056  102.005  102.035
 integrate_four_center_main           4 10.0    0.106    0.525   96.563   98.222
 integrate_four_center_bin          263 11.0   96.458   98.215   96.458   98.215
 init_scf_loop                        1  4.0    0.000    0.000   91.272   91.274
 mp2_ri_gpw_compute_in                1  5.0    0.093    0.182   74.955   75.928
 mp2_ri_gpw_compute_in_loop           1  6.0    0.003    0.035   54.511   55.499
 mp2_eri_3c_integrate_gpw            91  7.0    0.145    0.160   42.305   47.297
 integrate_v_rspace                  95  8.0    0.398    0.570   28.681   33.477
 pw_transfer                       2240 10.6    0.146    0.165   29.901   30.363
 ao_to_mo_and_store_B_mult_1         91  7.0   10.523   29.523   10.523   29.523
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.020   28.907   29.336
 grid_integrate_task_list            95  9.0   23.972   28.992   23.972   28.992
 mp2_ri_gpw_compute_en                1  5.0    0.067    0.123   26.889   28.584
 fft_wrap_pw1pw2_100                730 12.4    1.271    1.425   26.622   27.046
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.837    1.908   25.244   25.254
 get_2c_integrals                     1  6.0    0.005    0.025   20.327   20.361
 compute_2c_integrals                 1  7.0    0.008    0.027   19.309   19.326
 compute_2c_integrals_loop_lm         1  8.0    0.004    0.014   18.890   19.179
 mp2_eri_2c_integrate_gpw             1  9.0    1.739    1.813   18.886   19.177
 fft3d_s                           1823 13.4   18.398   18.714   18.412   18.728
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.000   14.469   14.470
 calculate_wavefunction              91  8.0    2.016    2.055    9.739    9.996
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.557    0.584    8.847    9.335
 potential_pw2rs                    186 10.0    0.033    0.035    8.581    8.960
 local_gemm                         172  8.0    8.290    8.751    8.290    8.751
 mp2_ri_gpw_compute_en_comm          22  7.0    0.504    0.522    8.174    8.692
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.018    8.268    8.487
 calc_potential_gpw                 182  9.5    0.002    0.002    7.992    8.254
 collocate_single_gaussian           91 10.0    0.027    0.112    7.877    8.030
 mp_sendrecv_dm3                   2068  8.0    6.197    6.684    6.197    6.684
 mp2_ri_gpw_compute_en_ener         172  7.0    6.341    6.420    6.341    6.420
 pw_gather_s                        912 13.2    4.884    5.401    4.884    5.401
 mp_sync                             38 10.4    2.545    5.292    2.545    5.292
 pw_scatter_s                       910 13.7    3.986    4.282    3.986    4.282
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=101.989826, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1502.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             452.055040E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62385.
 MP_Allreduce        10249                    271.
 MP_Sync               580
 MP_Alltoall          2083                1361960.
 MP_ISendRecv        45220                   5520.
 MP_Wait             60486
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.060   56.006   56.011
 qs_mol_dyn_low                       1  2.0    0.020    0.074   55.478   55.485
 qs_forces                           11  3.9    0.002    0.003   55.312   55.333
 qs_energies                         11  4.9    0.002    0.005   53.698   53.729
 scf_env_do_scf                      11  5.9    0.000    0.001   47.235   47.235
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   44.743   44.743
 dbcsr_multiply_generic            2286 12.5    0.093    0.097   34.546   34.983
 qs_scf_new_mos                     108  7.5    0.000    0.000   33.527   33.803
 qs_scf_loop_do_ot                  108  8.5    0.000    0.000   33.526   33.802
 ot_scf_mini                        108  9.5    0.002    0.002   31.853   32.037
 multiply_cannon                   2286 13.5    0.179    0.187   26.221   27.822
 velocity_verlet                     10  3.0    0.001    0.002   27.804   27.805
 multiply_cannon_loop              2286 14.5    1.505    1.576   25.318   26.946
 ot_mini                            108 10.5    0.001    0.001   20.274   20.510
 qs_ot_get_derivative               108 11.5    0.001    0.001   17.179   17.363
 mp_waitall_1                    267858 16.1    9.885   15.819    9.885   15.819
 multiply_cannon_metrocomm3       54864 15.5    0.066    0.071    6.206   13.078
 multiply_cannon_multrec          54864 15.5    4.132    6.381    7.424   10.779
 rebuild_ks_matrix                  119  8.3    0.000    0.000    8.664    8.810
 qs_ks_build_kohn_sham_matrix       119  9.3    0.011    0.019    8.663    8.810
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.626    7.759
 mp_sum_l                          7207 12.9    5.770    7.557    5.770    7.557
 qs_ot_get_p                        119 10.4    0.001    0.001    6.763    7.069
 multiply_cannon_sync_h2d         54864 15.5    5.747    7.031    5.747    7.031
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.892    6.338
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    5.867    5.971
 sum_up_and_integrate               119 10.3    0.012    0.015    5.103    5.116
 integrate_v_rspace                 119 11.3    0.002    0.002    5.091    5.104
 init_scf_run                        11  5.9    0.000    0.001    5.027    5.028
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.027    5.028
 qs_rho_update_rho_low              119  7.7    0.000    0.001    4.836    5.000
 calculate_rho_elec                 119  8.7    0.011    0.016    4.836    5.000
 dbcsr_mm_accdrv_process          76910 16.1    1.202    1.855    3.216    4.477
 rs_pw_transfer                     974 11.9    0.012    0.013    3.539    3.817
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    3.670    3.739
 multiply_cannon_metrocomm1       54864 15.5    0.052    0.058    2.012    3.565
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.945    3.179
 apply_single                       119 13.6    0.000    0.000    2.945    3.179
 calculate_dm_sparse                119  9.5    0.000    0.000    2.894    3.030
 density_rs2pw                      119  9.7    0.004    0.004    2.799    2.950
 ot_diis_step                       108 11.5    0.006    0.006    2.809    2.810
 cp_dbcsr_syevd                      50 12.0    0.002    0.003    2.769    2.769
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.592    2.708
 calculate_first_density_matrix       1  7.0    0.000    0.004    2.603    2.609
 jit_kernel_multiply                 13 15.8    1.953    2.536    1.953    2.536
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    2.472    2.474
 init_scf_loop                       11  6.9    0.001    0.003    2.467    2.467
 potential_pw2rs                    119 12.3    0.004    0.004    2.390    2.439
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.416    2.417
 cp_fm_redistribute_end              50 14.0    2.185    2.382    2.189    2.384
 wfi_extrapolate                     11  7.9    0.001    0.001    2.351    2.351
 make_m2s                          4572 13.5    0.053    0.056    2.277    2.350
 cp_fm_diag_elpa_base                50 14.0    0.192    2.321    0.193    2.331
 pw_transfer                       1439 11.6    0.051    0.055    2.216    2.325
 make_images                       4572 14.5    0.133    0.139    2.195    2.268
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    2.141    2.252
 mp_sum_d                          4129 12.0    1.535    2.209    1.535    2.209
 acc_transpose_blocks             54864 15.5    0.227    0.245    1.730    2.160
 grid_integrate_task_list           119 12.3    2.004    2.121    2.004    2.121
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.994    2.048
 fft3d_ps                          1201 14.6    0.359    0.463    1.923    2.029
 mp_alltoall_d11v                  2130 13.8    1.747    1.929    1.747    1.929
 fft_wrap_pw1pw2_140                487 13.2    0.077    0.090    1.628    1.739
 mp_waitany                       12084 13.8    1.371    1.614    1.371    1.614
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.407    1.426
 grid_collocate_task_list           119  9.7    1.291    1.366    1.291    1.366
 dbcsr_dot_sd                      1205 11.9    0.047    0.058    0.839    1.232
 prepare_preconditioner              11  7.9    0.000    0.000    1.187    1.224
 make_preconditioner                 11  8.9    0.000    0.000    1.187    1.224
 make_images_sizes                 4572 15.5    0.004    0.005    0.946    1.179
 mp_alltoall_i44                   4572 16.5    0.942    1.175    0.942    1.175
 mp_alltoall_z22v                  1201 16.6    1.084    1.166    1.084    1.166
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.121    1.165
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=56.011000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=430.636364, yerr=1.822722
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             487.428096E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62664.
 MP_Allreduce        10226                    305.
 MP_Sync               104
 MP_Alltoall          2060                 695101.
 MP_ISendRecv        33558                  37093.
 MP_Wait             40318
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.030   38.527   38.529
 qs_mol_dyn_low                       1  2.0    0.013    0.032   38.224   38.232
 qs_forces                           11  3.9    0.010    0.030   38.158   38.168
 qs_energies                         11  4.9    0.001    0.002   36.431   36.454
 scf_env_do_scf                      11  5.9    0.000    0.001   31.334   31.335
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   28.799   28.799
 dbcsr_multiply_generic            2286 12.5    0.100    0.103   21.381   21.770
 qs_scf_new_mos                     108  7.5    0.001    0.001   19.779   20.028
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   19.778   20.027
 ot_scf_mini                        108  9.5    0.002    0.003   18.868   19.042
 velocity_verlet                     10  3.0    0.001    0.002   18.384   18.385
 multiply_cannon                   2286 13.5    0.206    0.215   16.274   17.863
 multiply_cannon_loop              2286 14.5    0.905    0.980   15.130   16.612
 ot_mini                            108 10.5    0.001    0.001   11.802   12.036
 mp_waitall_1                    217478 16.2    6.078   11.223    6.078   11.223
 multiply_cannon_metrocomm3       27432 15.5    0.068    0.071    4.115    9.564
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.344    9.518
 multiply_cannon_multrec          27432 15.5    1.975    4.335    5.858    8.621
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.186    7.321
 qs_ks_build_kohn_sham_matrix       119  9.3    0.014    0.029    7.186    7.320
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.325    6.446
 dbcsr_mm_accdrv_process          47894 16.0    2.935    5.220    3.815    5.653
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.602    4.450
 qs_ot_get_p                        119 10.4    0.001    0.001    4.159    4.393
 sum_up_and_integrate               119 10.3    0.024    0.027    4.203    4.210
 integrate_v_rspace                 119 11.3    0.002    0.002    4.179    4.187
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.046    4.126
 apply_single                       119 13.6    0.000    0.000    3.046    4.126
 mp_sum_l                          7207 12.9    2.064    4.017    2.064    4.017
 init_scf_run                        11  5.9    0.000    0.001    3.873    3.874
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.873    3.873
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.648    3.674
 calculate_rho_elec                 119  8.7    0.021    0.024    3.647    3.673
 make_m2s                          4572 13.5    0.052    0.053    2.592    2.833
 multiply_cannon_sync_h2d         27432 15.5    2.190    2.824    2.190    2.824
 make_images                       4572 14.5    0.199    0.237    2.505    2.744
 rs_pw_transfer                     974 11.9    0.010    0.011    2.633    2.741
 qs_ot_p2m_diag                      50 11.0    0.008    0.012    2.701    2.720
 init_scf_loop                       11  6.9    0.000    0.000    2.511    2.512
 ot_diis_step                       108 11.5    0.011    0.011    2.403    2.404
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.348    2.350
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.259    2.260
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.144    2.236
 calculate_dm_sparse                119  9.5    0.000    0.001    2.052    2.129
 density_rs2pw                      119  9.7    0.004    0.004    2.008    2.097
 potential_pw2rs                    119 12.3    0.006    0.007    1.937    1.952
 jit_kernel_multiply                 10 16.0    0.828    1.951    0.828    1.951
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.922    1.924
 grid_integrate_task_list           119 12.3    1.837    1.920    1.837    1.920
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.913    1.913
 cp_fm_redistribute_end              50 14.0    1.577    1.888    1.580    1.889
 cp_fm_diag_elpa_base                50 14.0    0.299    1.833    0.307    1.863
 pw_transfer                       1439 11.6    0.063    0.067    1.782    1.810
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.709    1.747
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.008    1.693    1.722
 prepare_preconditioner              11  7.9    0.000    0.000    1.605    1.632
 make_preconditioner                 11  8.9    0.000    0.000    1.605    1.632
 make_images_data                  4572 15.5    0.045    0.052    1.166    1.580
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.499    1.560
 acc_transpose_blocks             27432 15.5    0.108    0.112    1.188    1.483
 hybrid_alltoall_any               4725 16.4    0.051    0.111    1.011    1.477
 wfi_extrapolate                     11  7.9    0.001    0.001    1.473    1.473
 mp_alltoall_d11v                  2130 13.8    1.233    1.434    1.233    1.434
 fft3d_ps                          1201 14.6    0.497    0.549    1.400    1.426
 mp_allgather_i34                  2286 14.5    0.590    1.375    0.590    1.375
 fft_wrap_pw1pw2_140                487 13.2    0.076    0.084    1.304    1.334
 grid_collocate_task_list           119  9.7    1.240    1.313    1.240    1.313
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.262    1.270
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.187    1.234
 mp_sum_d                          4129 12.0    0.560    1.000    0.560    1.000
 qs_energies_init_hamiltonians       11  5.9    0.003    0.009    0.945    0.964
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.917    0.931
 acc_transpose_blocks_kernels     27432 16.5    0.183    0.273    0.655    0.864
 make_images_sizes                 4572 15.5    0.005    0.005    0.583    0.829
 mp_alltoall_i44                   4572 16.5    0.578    0.825    0.578    0.825
 rs_pw_transfer_PW2RS_50            119 14.3    0.587    0.606    0.766    0.812
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=38.529000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=463.818182, yerr=1.465865
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             522.076160E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62660.
 MP_Allreduce        10225                    303.
 MP_Sync               104
 MP_Alltoall          1821                1607811.
 MP_ISendRecv        22134                  57667.
 MP_Wait             33054
 MP_comm_split          50
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.125    0.222   34.264   34.269
 qs_mol_dyn_low                       1  2.0    0.006    0.008   33.112   33.119
 qs_forces                           11  3.9    0.033    0.040   33.052   33.055
 qs_energies                         11  4.9    0.012    0.049   31.367   31.401
 scf_env_do_scf                      11  5.9    0.001    0.002   25.436   25.437
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   22.843   22.845
 dbcsr_multiply_generic            2286 12.5    0.108    0.121   16.642   16.752
 velocity_verlet                     10  3.0    0.001    0.002   15.183   15.184
 qs_scf_new_mos                     108  7.5    0.001    0.001   14.788   14.804
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   14.788   14.803
 ot_scf_mini                        108  9.5    0.003    0.003   14.062   14.076
 multiply_cannon                   2286 13.5    0.193    0.200   13.102   13.870
 multiply_cannon_loop              2286 14.5    0.640    0.667   12.285   13.073
 ot_mini                            108 10.5    0.001    0.001    8.790    8.803
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.295    7.311
 multiply_cannon_multrec          18288 15.5    1.931    2.834    6.776    7.077
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.467    6.485
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.014    6.467    6.485
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.671    5.687
 dbcsr_mm_accdrv_process          38222 16.0    4.033    5.285    4.763    5.557
 init_scf_run                        11  5.9    0.000    0.001    4.532    4.532
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    4.532    4.532
 mp_waitall_1                    169478 16.3    3.064    4.187    3.064    4.187
 sum_up_and_integrate               119 10.3    0.030    0.031    4.042    4.063
 integrate_v_rspace                 119 11.3    0.002    0.003    4.011    4.037
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.727    3.402
 calculate_first_density_matrix       1  7.0    0.001    0.002    3.316    3.319
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.284    3.293
 calculate_rho_elec                 119  8.7    0.030    0.031    3.283    3.292
 qs_ot_get_p                        119 10.4    0.001    0.002    3.106    3.132
 init_scf_loop                       11  6.9    0.001    0.003    2.575    2.580
 rs_pw_transfer                     974 11.9    0.009    0.010    2.380    2.485
 multiply_cannon_metrocomm3       18288 15.5    0.045    0.046    1.443    2.429
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.084    2.422
 apply_single                       119 13.6    0.000    0.000    2.083    2.422
 make_m2s                          4572 13.5    0.044    0.046    1.941    2.096
 qs_ot_p2m_diag                      50 11.0    0.012    0.012    2.014    2.020
 make_images                       4572 14.5    0.190    0.201    1.855    2.009
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.983    1.986
 density_rs2pw                      119  9.7    0.004    0.004    1.867    1.966
 grid_integrate_task_list           119 12.3    1.797    1.875    1.797    1.875
 jit_kernel_multiply                 10 15.9    0.678    1.867    0.678    1.867
 potential_pw2rs                    119 12.3    0.007    0.008    1.818    1.845
 calculate_dm_sparse                119  9.5    0.000    0.000    1.821    1.829
 pw_transfer                       1439 11.6    0.063    0.065    1.795    1.818
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.771    1.798
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.751    1.752
 prepare_preconditioner              11  7.9    0.000    0.000    1.737    1.740
 make_preconditioner                 11  8.9    0.000    0.001    1.737    1.740
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.706    1.728
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.588    1.672
 mp_sum_l                          7207 12.9    1.213    1.628    1.213    1.628
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.571    1.579
 multiply_cannon_sync_h2d         18288 15.5    1.385    1.573    1.385    1.573
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.511    1.512
 cp_fm_redistribute_end              50 14.0    1.121    1.488    1.122    1.489
 ot_diis_step                       108 11.5    0.011    0.011    1.475    1.475
 cp_fm_diag_elpa_base                50 14.0    0.351    1.425    0.365    1.462
 fft3d_ps                          1201 14.6    0.506    0.524    1.393    1.415
 fft_wrap_pw1pw2_140                487 13.2    0.086    0.091    1.352    1.376
 grid_collocate_task_list           119  9.7    1.203    1.301    1.203    1.301
 acc_transpose_blocks             18288 15.5    0.075    0.077    1.225    1.245
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.001    1.188    1.192
 wfi_extrapolate                     11  7.9    0.001    0.001    1.167    1.168
 multiply_cannon_metrocomm1       18288 15.5    0.029    0.030    0.416    1.142
 qs_energies_init_hamiltonians       11  5.9    0.034    0.051    1.056    1.089
 make_images_data                  4572 15.5    0.045    0.049    0.823    1.004
 parallel_gemm_fm                    81  9.0    0.000    0.000    0.917    0.920
 parallel_gemm_fm_cosma              81 10.0    0.917    0.920    0.917    0.920
 hybrid_alltoall_any               4725 16.4    0.055    0.112    0.700    0.903
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.878    0.900
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.866    0.870
 mp_alltoall_d11v                  2130 13.8    0.730    0.850    0.730    0.850
 make_basis_sm                       11  9.8    0.000    0.000    0.847    0.849
 mp_alltoall_z22v                  1201 16.6    0.728    0.799    0.728    0.799
 acc_transpose_blocks_kernels     18288 16.5    0.212    0.219    0.784    0.796
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.663    0.747
 cp_fm_cholesky_invert               11 10.9    0.739    0.743    0.739    0.743
 mp_sum_d                          4129 12.0    0.531    0.736    0.531    0.736
 dbcsr_complete_redistribute        329 12.2    0.105    0.146    0.542    0.696
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=34.269000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=495.818182, yerr=1.898238
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             556.445696E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62659.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                2412273.
 MP_ISendRecv        16422                  74133.
 MP_Wait             24482
 MP_comm_split          50
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.049    0.100   36.447   36.448
 qs_mol_dyn_low                       1  2.0    0.003    0.003   35.921   35.929
 qs_forces                           11  3.9    0.002    0.002   35.863   35.863
 qs_energies                         11  4.9    0.001    0.003   34.115   34.122
 scf_env_do_scf                      11  5.9    0.001    0.001   28.986   28.988
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   25.312   25.313
 dbcsr_multiply_generic            2286 12.5    0.100    0.102   18.620   18.798
 velocity_verlet                     10  3.0    0.001    0.002   18.365   18.367
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.929   16.986
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.929   16.985
 ot_scf_mini                        108  9.5    0.002    0.003   15.972   16.027
 multiply_cannon                   2286 13.5    0.229    0.270   14.564   15.129
 multiply_cannon_loop              2286 14.5    0.939    0.967   13.530   13.939
 ot_mini                            108 10.5    0.001    0.001    9.782    9.851
 multiply_cannon_multrec          27432 15.5    2.351    3.042    8.608    8.963
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.947    8.004
 dbcsr_mm_accdrv_process          47916 15.9    5.277    6.929    6.164    7.382
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.665    6.727
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.664    6.727
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.924    5.980
 sum_up_and_integrate               119 10.3    0.035    0.038    3.865    3.873
 integrate_v_rspace                 119 11.3    0.002    0.003    3.830    3.839
 init_scf_run                        11  5.9    0.000    0.001    3.742    3.743
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.742    3.742
 init_scf_loop                       11  6.9    0.001    0.004    3.647    3.648
 qs_ot_get_p                        119 10.4    0.001    0.001    3.505    3.589
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.440    3.473
 calculate_rho_elec                 119  8.7    0.040    0.046    3.440    3.473
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.802    3.288
 mp_waitall_1                    145218 16.4    2.506    3.100    2.506    3.100
 prepare_preconditioner              11  7.9    0.000    0.000    2.726    2.735
 make_preconditioner                 11  8.9    0.000    0.001    2.726    2.735
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.331    2.657
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.136    2.589
 apply_single                       119 13.6    0.000    0.000    2.136    2.589
 make_m2s                          4572 13.5    0.054    0.055    2.462    2.548
 make_images                       4572 14.5    0.273    0.332    2.354    2.438
 rs_pw_transfer                     974 11.9    0.009    0.009    2.242    2.355
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.303    2.305
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.110    2.122
 calculate_dm_sparse                119  9.5    0.000    0.000    2.046    2.103
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.041    2.074
 density_rs2pw                      119  9.7    0.003    0.004    1.934    2.061
 grid_integrate_task_list           119 12.3    1.824    1.913    1.824    1.913
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.881    1.882
 pw_transfer                       1439 11.6    0.063    0.067    1.781    1.810
 ot_diis_step                       108 11.5    0.012    0.012    1.796    1.796
 mp_sum_l                          7207 12.9    1.224    1.782    1.224    1.782
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.781    1.782
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.692    1.723
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.647    1.663
 potential_pw2rs                    119 12.3    0.009    0.009    1.647    1.653
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.490    1.491
 jit_kernel_multiply                  8 15.7    0.826    1.483    0.826    1.483
 acc_transpose_blocks             27432 15.5    0.113    0.115    1.451    1.471
 cp_fm_redistribute_end              50 14.0    0.983    1.458    0.985    1.459
 cp_fm_diag_elpa_base                50 14.0    0.450    1.378    0.470    1.417
 multiply_cannon_metrocomm3       27432 15.5    0.038    0.039    0.837    1.405
 fft3d_ps                          1201 14.6    0.530    0.580    1.373    1.398
 wfi_extrapolate                     11  7.9    0.001    0.001    1.382    1.382
 fft_wrap_pw1pw2_140                487 13.2    0.084    0.092    1.317    1.349
 grid_collocate_task_list           119  9.7    1.220    1.326    1.220    1.326
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.001    1.215    1.228
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.153    1.175
 dbcsr_complete_redistribute        329 12.2    0.126    0.179    0.890    1.162
 cp_fm_upper_to_full                 72 13.5    0.810    1.120    0.810    1.120
 qs_energies_init_hamiltonians       11  5.9    0.015    0.022    1.107    1.108
 make_images_data                  4572 15.5    0.045    0.049    0.948    1.065
 multiply_cannon_sync_h2d         27432 15.5    0.976    1.035    0.976    1.035
 hybrid_alltoall_any               4725 16.4    0.062    0.150    0.808    0.982
 mp_alltoall_d11v                  2130 13.8    0.819    0.919    0.819    0.919
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.632    0.901
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.796    0.873
 cp_fm_cholesky_invert               11 10.9    0.842    0.847    0.842    0.847
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.834    0.839
 acc_transpose_blocks_kernels     27432 16.5    0.271    0.280    0.823    0.831
 mp_alltoall_z22v                  1201 16.6    0.726    0.751    0.726    0.751
 mp_alltoall_i22                    627 13.8    0.437    0.735    0.437    0.735
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=36.448000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=527.272727, yerr=4.069642
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             598.585344E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                3682667.
 MP_ISendRecv        10710                  94533.
 MP_Wait             16690
 MP_comm_split          50
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.067    0.088   29.532   29.532
 qs_mol_dyn_low                       1  2.0    0.009    0.011   28.902   28.910
 qs_forces                           11  3.9    0.002    0.005   28.828   28.833
 qs_energies                         11  4.9    0.001    0.001   27.130   27.138
 scf_env_do_scf                      11  5.9    0.001    0.017   21.595   21.595
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   18.980   18.981
 velocity_verlet                     10  3.0    0.017    0.017   14.391   14.394
 dbcsr_multiply_generic            2286 12.5    0.093    0.095   12.611   12.730
 qs_scf_new_mos                     108  7.5    0.001    0.001   11.344   11.371
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   11.343   11.370
 ot_scf_mini                        108  9.5    0.002    0.002   10.657   10.682
 multiply_cannon                   2286 13.5    0.230    0.239    9.990   10.325
 multiply_cannon_loop              2286 14.5    0.332    0.342    9.062    9.264
 multiply_cannon_multrec           9144 15.5    1.672    1.902    6.022    6.258
 ot_mini                            108 10.5    0.001    0.001    6.015    6.044
 rebuild_ks_matrix                  119  8.3    0.000    0.000    5.821    5.844
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    5.821    5.844
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.191    5.211
 qs_ot_get_derivative               108 11.5    0.001    0.001    4.708    4.731
 dbcsr_mm_accdrv_process          12550 15.8    3.253    4.183    4.251    4.323
 init_scf_run                        11  5.9    0.000    0.001    3.924    3.924
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.924    3.924
 sum_up_and_integrate               119 10.3    0.037    0.041    3.565    3.569
 integrate_v_rspace                 119 11.3    0.003    0.003    3.527    3.532
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.273    3.280
 calculate_rho_elec                 119  8.7    0.059    0.061    3.272    3.279
 qs_ot_get_p                        119 10.4    0.001    0.001    2.859    2.899
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.788    2.788
 init_scf_loop                       11  6.9    0.000    0.000    2.579    2.581
 mp_waitall_1                    121218 16.5    1.832    2.344    1.832    2.344
 make_m2s                          4572 13.5    0.034    0.035    1.815    1.959
 grid_integrate_task_list           119 12.3    1.847    1.939    1.847    1.939
 jit_kernel_multiply                 10 15.9    0.960    1.932    0.960    1.932
 make_images                       4572 14.5    0.267    0.302    1.726    1.869
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.857    1.861
 calculate_dm_sparse                119  9.5    0.000    0.000    1.822    1.842
 prepare_preconditioner              11  7.9    0.000    0.000    1.818    1.821
 make_preconditioner                 11  8.9    0.000    0.000    1.818    1.821
 rs_pw_transfer                     974 11.9    0.008    0.008    1.688    1.807
 density_rs2pw                      119  9.7    0.004    0.004    1.673    1.783
 pw_transfer                       1439 11.6    0.063    0.066    1.744    1.753
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.701    1.725
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.718    1.720
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.660    1.662
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.653    1.662
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.602    1.614
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.481    1.489
 qs_energies_init_hamiltonians       11  5.9    0.092    0.129    1.381    1.387
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.365    1.366
 grid_collocate_task_list           119  9.7    1.271    1.360    1.271    1.360
 potential_pw2rs                    119 12.3    0.010    0.010    1.344    1.347
 cp_fm_redistribute_end              50 14.0    0.680    1.341    0.681    1.342
 fft3d_ps                          1201 14.6    0.539    0.549    1.321    1.331
 fft_wrap_pw1pw2_140                487 13.2    0.082    0.089    1.321    1.329
 cp_fm_diag_elpa_base                50 14.0    0.616    1.268    0.659    1.323
 ot_diis_step                       108 11.5    0.013    0.013    1.293    1.293
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.232    1.237
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.198    1.210
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.165    1.183
 apply_single                       119 13.6    0.000    0.000    1.165    1.183
 wfi_extrapolate                     11  7.9    0.001    0.001    1.091    1.091
 hybrid_alltoall_any               4725 16.4    0.062    0.175    0.825    1.056
 make_images_data                  4572 15.5    0.039    0.043    0.859    1.039
 acc_transpose_blocks              9144 15.5    0.039    0.040    1.011    1.017
 mp_alltoall_d11v                  2130 13.8    0.840    0.963    0.840    0.963
 cp_fm_cholesky_invert               11 10.9    0.920    0.923    0.920    0.923
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.869    0.918
 multiply_cannon_sync_h2d          9144 15.5    0.713    0.780    0.713    0.780
 acc_transpose_blocks_kernels      9144 16.5    0.118    0.121    0.770    0.772
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    0.707    0.757
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.753    0.755
 parallel_gemm_fm                    81  9.0    0.000    0.000    0.750    0.751
 parallel_gemm_fm_cosma              81 10.0    0.750    0.751    0.750    0.751
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.740    0.749
 multiply_cannon_metrocomm3        9144 15.5    0.019    0.019    0.385    0.725
 make_basis_sm                       11  9.8    0.000    0.000    0.713    0.714
 mp_allgather_i34                  2286 14.5    0.260    0.702    0.260    0.702
 mp_alltoall_z22v                  1201 16.6    0.656    0.695    0.656    0.695
 jit_kernel_transpose                 5 15.6    0.652    0.655    0.652    0.655
 dbcsr_complete_redistribute        329 12.2    0.201    0.245    0.617    0.645
 qs_create_task_list                 11  7.9    0.036    0.036    0.581    0.605
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=29.532000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=567.454545, yerr=4.075729
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             755.159040E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63729.
 MP_Allreduce        10074                    433.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_ISendRecv         4998                 189067.
 MP_Wait              8898
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.079    0.093   41.460   41.461
 qs_mol_dyn_low                       1  2.0    0.054    0.085   40.851   40.858
 qs_forces                           11  3.9    0.003    0.006   40.742   40.753
 qs_energies                         11  4.9    0.001    0.001   38.790   38.803
 scf_env_do_scf                      11  5.9    0.001    0.001   32.928   32.928
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.006   24.980   24.981
 velocity_verlet                     10  3.0    0.002    0.002   22.988   22.993
 dbcsr_multiply_generic            2286 12.5    0.101    0.101   17.655   17.722
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.065   16.159
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.065   16.158
 ot_scf_mini                        108  9.5    0.002    0.002   14.985   15.080
 multiply_cannon                   2286 13.5    0.308    0.318   13.711   14.533
 multiply_cannon_loop              2286 14.5    0.343    0.350   12.407   13.245
 ot_mini                            108 10.5    0.001    0.001    9.023    9.135
 multiply_cannon_multrec           9144 15.5    3.433    4.804    8.567    8.637
 init_scf_loop                       11  6.9    0.000    0.000    7.919    7.922
 qs_ot_get_derivative               108 11.5    0.001    0.001    6.993    7.089
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.904    7.041
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    6.903    7.041
 prepare_preconditioner              11  7.9    0.000    0.000    6.941    6.954
 make_preconditioner                 11  8.9    0.000    0.000    6.940    6.954
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.484    6.831
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.247    6.371
 dbcsr_mm_accdrv_process          12550 15.8    4.094    5.611    5.013    6.350
 cp_fm_upper_to_full                 72 14.2    3.188    4.588    3.188    4.588
 init_scf_run                        11  5.9    0.000    0.001    3.817    3.817
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.817    3.817
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.694    3.702
 calculate_rho_elec                 119  8.7    0.118    0.121    3.694    3.702
 sum_up_and_integrate               119 10.3    0.064    0.066    3.690    3.696
 integrate_v_rspace                 119 11.3    0.003    0.003    3.625    3.632
 mp_waitall_1                     97218 16.6    2.600    3.495    2.600    3.495
 qs_ot_get_p                        119 10.4    0.001    0.001    3.203    3.341
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.446    2.897
 dbcsr_complete_redistribute        329 12.2    0.288    0.296    2.007    2.843
 make_m2s                          4572 13.5    0.037    0.038    2.331    2.551
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.688    2.504
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.223    2.457
 apply_single                       119 13.6    0.000    0.000    2.223    2.457
 make_images                       4572 14.5    0.351    0.381    2.212    2.430
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.392    2.395
 mp_alltoall_i22                    627 13.8    1.493    2.339    1.493    2.339
 calculate_dm_sparse                119  9.5    0.000    0.000    2.246    2.264
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.452    2.261
 multiply_cannon_metrocomm3        9144 15.5    0.019    0.020    1.327    2.140
 grid_integrate_task_list           119 12.3    2.021    2.047    2.021    2.047
 pw_transfer                       1439 11.6    0.066    0.067    2.041    2.045
 ot_diis_step                       108 11.5    0.014    0.015    2.006    2.007
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.945    1.950
 qs_ot_p2m_diag                      50 11.0    0.042    0.043    1.934    1.936
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.928    1.929
 density_rs2pw                      119  9.7    0.003    0.003    1.783    1.803
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.776    1.787
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.697    1.748
 mp_sum_l                          7207 12.9    1.069    1.740    1.069    1.740
 jit_kernel_multiply                 10 15.5    0.893    1.724    0.893    1.724
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.661    1.661
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.598    1.624
 fft_wrap_pw1pw2_140                487 13.2    0.087    0.089    1.618    1.624
 fft3d_ps                          1201 14.6    0.567    0.578    1.580    1.583
 cp_fm_cholesky_invert               11 10.9    1.505    1.509    1.505    1.509
 grid_collocate_task_list           119  9.7    1.457    1.469    1.457    1.469
 rs_pw_transfer                     974 11.9    0.009    0.009    1.418    1.447
 hybrid_alltoall_any               4725 16.4    0.087    0.146    1.169    1.442
 make_images_data                  4572 15.5    0.043    0.046    1.113    1.374
 wfi_extrapolate                     11  7.9    0.001    0.001    1.369    1.369
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.359    1.360
 cp_fm_diag_elpa_base                50 14.0    1.213    1.268    1.357    1.357
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.315    1.323
 potential_pw2rs                    119 12.3    0.014    0.015    1.221    1.225
 mp_alltoall_d11v                  2130 13.8    1.169    1.189    1.169    1.189
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.150    1.168
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.113    1.139
 multiply_cannon_sync_h2d          9144 15.5    1.040    1.045    1.040    1.045
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.990    1.042
 acc_transpose_blocks              9144 15.5    0.039    0.039    0.973    0.979
 qs_create_task_list                 11  7.9    0.001    0.001    0.941    0.951
 generate_qs_task_list               11  8.9    0.372    0.391    0.941    0.950
 mp_alltoall_z22v                  1201 16.6    0.877    0.894    0.877    0.894
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.869    0.882
 copy_dbcsr_to_fm                   153 11.3    0.002    0.002    0.787    0.836
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=41.461000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=710.727273, yerr=11.378536
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             501.358592E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65372.
 MP_Allreduce         9840                    486.
 MP_Sync               100
 MP_Alltoall          1938                1441588.
 MP_ISendRecv        41800                   9096.
 MP_Wait             58168
 MP_comm_split          48
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.027   83.387   83.388
 qs_mol_dyn_low                       1  2.0    0.003    0.004   82.942   82.952
 qs_forces                           11  3.9    0.038    0.052   82.703   82.703
 qs_energies                         11  4.9    0.001    0.002   79.873   79.917
 scf_env_do_scf                      11  5.9    0.000    0.001   70.988   70.991
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   65.436   65.437
 dbcsr_multiply_generic            2055 12.4    0.105    0.108   52.490   52.710
 qs_scf_new_mos                      99  7.5    0.000    0.001   48.525   48.651
 qs_scf_loop_do_ot                   99  8.5    0.000    0.001   48.524   48.650
 ot_scf_mini                         99  9.5    0.002    0.002   46.061   46.136
 multiply_cannon                   2055 13.4    0.174    0.178   42.650   43.358
 velocity_verlet                     10  3.0    0.011    0.016   42.634   42.637
 multiply_cannon_loop              2055 14.4    1.571    1.611   41.653   42.307
 ot_mini                             99 10.5    0.001    0.001   27.960   28.037
 qs_ot_get_derivative                99 11.5    0.001    0.001   21.117   21.196
 multiply_cannon_multrec          49320 15.4   12.026   12.713   17.090   17.825
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.385   14.478
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.012   14.384   14.477
 mp_waitall_1                    241148 16.1   11.983   12.821   11.983   12.821
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.649   12.725
 multiply_cannon_sync_h2d         49320 15.4    9.884   10.503    9.884   10.503
 qs_ot_get_p                        110 10.4    0.001    0.001    9.439    9.536
 multiply_cannon_metrocomm3       49320 15.4    0.076    0.080    7.145    8.175
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.228    7.775
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.160    7.676
 apply_single                       110 13.6    0.000    0.000    7.160    7.675
 sum_up_and_integrate               110 10.3    0.036    0.043    6.998    7.012
 integrate_v_rspace                 110 11.3    0.002    0.003    6.962    6.984
 init_scf_run                        11  5.9    0.000    0.001    6.868    6.869
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.868    6.869
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    6.686    6.722
 ot_diis_step                        99 11.5    0.006    0.006    6.526    6.527
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.223    6.355
 calculate_rho_elec                 110  8.6    0.020    0.024    6.223    6.355
 qs_ot_p2m_diag                      48 11.0    0.012    0.018    6.115    6.143
 mp_sum_l                          6514 12.8    5.482    6.143    5.482    6.143
 init_scf_loop                       11  6.9    0.000    0.000    5.522    5.523
 dbcsr_mm_accdrv_process          87628 16.1    2.125    2.234    4.944    5.252
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    5.079    5.080
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.576    4.577
 cp_fm_redistribute_end              48 14.0    3.984    4.548    3.988    4.549
 cp_fm_diag_elpa_base                48 14.0    0.555    4.447    0.558    4.473
 make_m2s                          4110 13.4    0.061    0.065    3.910    4.035
 wfi_extrapolate                     11  7.9    0.001    0.001    4.015    4.016
 rs_pw_transfer                     902 11.9    0.012    0.013    3.773    3.983
 make_images                       4110 14.4    0.178    0.192    3.815    3.944
 calculate_dm_sparse                110  9.5    0.000    0.001    3.829    3.938
 multiply_cannon_metrocomm1       49320 15.4    0.060    0.064    2.712    3.880
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.610    3.613
 density_rs2pw                      110  9.6    0.004    0.004    3.290    3.488
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    3.419    3.466
 grid_integrate_task_list           110 12.3    3.263    3.399    3.263    3.399
 prepare_preconditioner              11  7.9    0.000    0.000    3.376    3.390
 make_preconditioner                 11  8.9    0.000    0.000    3.376    3.390
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.218    3.250
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.158    3.204
 pw_transfer                       1331 11.6    0.054    0.061    2.854    2.917
 fft_wrap_pw1pw2                   1111 12.6    0.007    0.008    2.767    2.832
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.765    2.771
 potential_pw2rs                    110 12.3    0.006    0.007    2.589    2.616
 jit_kernel_multiply                 13 15.9    2.536    2.548    2.536    2.548
 mp_alltoall_d11v                  2046 13.8    2.128    2.543    2.128    2.543
 fft_wrap_pw1pw2_140                451 13.1    0.168    0.187    2.297    2.362
 fft3d_ps                          1111 14.6    0.749    0.836    2.305    2.359
 acc_transpose_blocks             49320 15.4    0.216    0.226    2.173    2.238
 grid_collocate_task_list           110  9.6    2.090    2.199    2.090    2.199
 mp_waitany                       14300 13.8    1.837    2.153    1.837    2.153
 mp_sum_d                          3883 11.9    1.466    1.960    1.466    1.960
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.875    1.896
 make_images_data                  4110 15.4    0.043    0.046    1.764    1.885
 cp_fm_cholesky_invert               11 10.9    1.832    1.836    1.832    1.836
 hybrid_alltoall_any               4261 16.3    0.081    0.482    1.534    1.789
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=83.388000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=476.272727, yerr=2.377581
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             591.151104E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65587.
 MP_Allreduce         9839                    562.
 MP_Sync               100
 MP_Alltoall          1717                1808072.
 MP_ISendRecv        20680                  26400.
 MP_Wait             32692
 MP_comm_split          48
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.192    0.241   70.939   70.939
 qs_mol_dyn_low                       1  2.0    0.004    0.004   70.230   70.261
 qs_forces                           11  3.9    0.002    0.003   70.159   70.161
 qs_energies                         11  4.9    0.001    0.001   66.811   66.816
 scf_env_do_scf                      11  5.9    0.000    0.001   57.905   57.908
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   49.712   49.713
 dbcsr_multiply_generic            2055 12.4    0.114    0.118   38.006   38.158
 velocity_verlet                     10  3.0    0.001    0.002   37.202   37.203
 qs_scf_new_mos                      99  7.5    0.001    0.001   33.376   33.503
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   33.375   33.503
 multiply_cannon                   2055 13.4    0.220    0.243   31.016   32.155
 ot_scf_mini                         99  9.5    0.003    0.003   31.698   31.820
 multiply_cannon_loop              2055 14.4    0.932    0.955   29.627   30.525
 ot_mini                             99 10.5    0.001    0.001   18.643   18.772
 multiply_cannon_multrec          24660 15.4    7.648    9.192   13.869   15.267
 rebuild_ks_matrix                  110  8.3    0.000    0.001   13.906   13.993
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   13.906   13.992
 qs_ot_get_derivative                99 11.5    0.001    0.001   12.833   12.953
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.277   12.352
 mp_waitall_1                    186928 16.3    8.248   10.888    8.248   10.888
 multiply_cannon_sync_h2d         24660 15.4    7.006    8.295    7.006    8.295
 init_scf_loop                       11  6.9    0.000    0.000    8.153    8.154
 multiply_cannon_metrocomm3       24660 15.4    0.071    0.073    5.077    7.499
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.535    7.292
 apply_single                       110 13.6    0.000    0.001    6.535    7.292
 sum_up_and_integrate               110 10.3    0.052    0.059    6.852    6.861
 integrate_v_rspace                 110 11.3    0.002    0.003    6.799    6.813
 qs_ot_get_p                        110 10.4    0.001    0.001    6.380    6.530
 init_scf_run                        11  5.9    0.000    0.001    6.440    6.440
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.439    6.440
 dbcsr_mm_accdrv_process          52282 16.1    4.730    5.407    6.062    6.395
 prepare_preconditioner              11  7.9    0.000    0.000    6.103    6.133
 make_preconditioner                 11  8.9    0.000    0.000    6.103    6.133
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.050    6.058
 calculate_rho_elec                 110  8.6    0.039    0.047    6.049    6.058
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.663    5.826
 ot_diis_step                        99 11.5    0.010    0.010    5.762    5.763
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.744    5.477
 make_m2s                          4110 13.4    0.056    0.059    4.510    4.950
 make_images                       4110 14.4    0.396    0.439    4.402    4.838
 qs_ot_p2m_diag                      48 11.0    0.028    0.044    4.285    4.306
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.824    3.824
 pw_transfer                       1331 11.6    0.065    0.070    3.504    3.656
 wfi_extrapolate                     11  7.9    0.001    0.001    3.636    3.636
 rs_pw_transfer                     902 11.9    0.012    0.013    3.380    3.550
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.397    3.548
 density_rs2pw                      110  9.6    0.004    0.004    3.359    3.524
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.366    3.433
 grid_integrate_task_list           110 12.3    3.135    3.325    3.135    3.325
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.275    3.277
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.162    3.164
 cp_fm_redistribute_end              48 14.0    2.347    3.119    2.350    3.120
 cp_fm_diag_elpa_base                48 14.0    0.734    2.983    0.765    3.066
 calculate_dm_sparse                110  9.5    0.001    0.001    2.982    3.009
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.890    2.947
 fft3d_ps                          1111 14.6    1.071    1.267    2.758    2.893
 make_images_data                  4110 15.4    0.048    0.051    2.420    2.864
 cp_fm_cholesky_invert               11 10.9    2.843    2.851    2.843    2.851
 fft_wrap_pw1pw2_140                451 13.1    0.197    0.217    2.670    2.815
 hybrid_alltoall_any               4261 16.3    0.102    0.441    2.126    2.786
 potential_pw2rs                    110 12.3    0.008    0.009    2.706    2.742
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.711    2.714
 mp_sum_l                          6514 12.8    1.897    2.608    1.897    2.608
 grid_collocate_task_list           110  9.6    2.121    2.271    2.121    2.271
 mp_alltoall_d11v                  2046 13.8    1.908    2.137    1.908    2.137
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.996    2.016
 qs_energies_init_hamiltonians       11  5.9    0.020    0.033    1.947    1.950
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.795    1.808
 multiply_cannon_metrocomm4       22605 15.4    0.074    0.079    0.784    1.716
 mp_allgather_i34                  2055 14.4    0.718    1.698    0.718    1.698
 jit_kernel_multiply                  8 16.2    0.980    1.617    0.980    1.617
 mp_irecv_dv                      57340 16.2    0.661    1.602    0.661    1.602
 cp_fm_cholesky_decompose            22 10.9    1.559    1.565    1.559    1.565
 dbcsr_complete_redistribute        325 12.2    0.234    0.302    1.272    1.555
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.539    1.549
 acc_transpose_blocks             24660 15.4    0.107    0.110    1.516    1.538
 mp_alltoall_z22v                  1111 16.6    1.396    1.493    1.396    1.493
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.374    1.483
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=70.939000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=556.272727, yerr=8.708313
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             658.571264E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65578.
 MP_Allreduce         9838                    559.
 MP_Sync               100
 MP_Alltoall          1496                4511006.
 MP_ISendRecv        13640                  27424.
 MP_Wait             32318
 MP_comm_split          48
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.040    0.084   60.845   60.846
 qs_mol_dyn_low                       1  2.0    0.003    0.003   60.335   60.344
 qs_forces                           11  3.9    0.036    0.053   60.270   60.271
 qs_energies                         11  4.9    0.001    0.003   57.066   57.103
 scf_env_do_scf                      11  5.9    0.001    0.001   48.917   48.917
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   40.106   40.107
 velocity_verlet                     10  3.0    0.001    0.002   32.778   32.780
 dbcsr_multiply_generic            2055 12.4    0.108    0.112   28.551   28.776
 qs_scf_new_mos                      99  7.5    0.001    0.001   25.201   25.289
 qs_scf_loop_do_ot                   99  8.5    0.001    0.002   25.200   25.289
 ot_scf_mini                         99  9.5    0.003    0.004   23.970   24.081
 multiply_cannon                   2055 13.4    0.210    0.220   22.106   23.295
 multiply_cannon_loop              2055 14.4    0.621    0.637   20.861   21.874
 ot_mini                             99 10.5    0.001    0.001   13.799   13.913
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.502   12.609
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   12.501   12.608
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.067   11.162
 multiply_cannon_multrec          16440 15.4    3.951    5.269    9.704   10.917
 mp_waitall_1                    146766 16.3    7.289   10.504    7.289   10.504
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.247    9.359
 init_scf_loop                       11  6.9    0.000    0.002    8.775    8.777
 multiply_cannon_metrocomm3       16440 15.4    0.043    0.044    4.187    7.282
 prepare_preconditioner              11  7.9    0.000    0.000    6.962    6.976
 make_preconditioner                 11  8.9    0.000    0.001    6.962    6.976
 sum_up_and_integrate               110 10.3    0.060    0.061    6.720    6.736
 integrate_v_rspace                 110 11.3    0.003    0.003    6.660    6.677
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.305    6.664
 dbcsr_mm_accdrv_process          34862 16.1    4.665    5.367    5.606    5.762
 init_scf_run                        11  5.9    0.000    0.001    5.728    5.729
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.728    5.728
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.638    5.648
 calculate_rho_elec                 110  8.6    0.058    0.058    5.638    5.648
 qs_ot_get_p                        110 10.4    0.001    0.001    5.404    5.543
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    5.019    5.452
 apply_single                       110 13.6    0.000    0.000    5.019    5.451
 make_m2s                          4110 13.4    0.050    0.051    4.330    4.707
 make_images                       4110 14.4    0.390    0.512    4.215    4.595
 ot_diis_step                        99 11.5    0.011    0.011    4.524    4.525
 multiply_cannon_sync_h2d         16440 15.4    3.722    4.384    3.722    4.384
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.147    3.778
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    3.677    3.680
 grid_integrate_task_list           110 12.3    3.198    3.400    3.198    3.400
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.328    3.328
 pw_transfer                       1331 11.6    0.064    0.070    3.284    3.294
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.178    3.190
 density_rs2pw                      110  9.6    0.004    0.004    2.913    3.098
 rs_pw_transfer                     902 11.9    0.010    0.011    2.819    2.992
 make_images_data                  4110 15.4    0.044    0.049    2.484    2.982
 wfi_extrapolate                     11  7.9    0.001    0.001    2.972    2.972
 hybrid_alltoall_any               4261 16.3    0.105    0.371    2.217    2.937
 cp_fm_cholesky_invert               11 10.9    2.776    2.783    2.776    2.783
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.775    2.779
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.756    2.758
 cp_fm_redistribute_end              48 14.0    1.726    2.747    1.728    2.747
 cp_fm_diag_elpa_base                48 14.0    0.955    2.597    1.013    2.705
 fft_wrap_pw1pw2_140                451 13.1    0.208    0.212    2.670    2.682
 calculate_first_density_matrix       1  7.0    0.000    0.003    2.673    2.674
 calculate_dm_sparse                110  9.5    0.001    0.001    2.510    2.538
 fft3d_ps                          1111 14.6    1.063    1.075    2.493    2.509
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.430    2.469
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.398    2.457
 multiply_cannon_metrocomm4       14385 15.4    0.045    0.049    0.857    2.384
 grid_collocate_task_list           110  9.6    2.174    2.375    2.174    2.375
 potential_pw2rs                    110 12.3    0.011    0.011    2.344    2.361
 mp_alltoall_d11v                  2046 13.8    2.015    2.344    2.015    2.344
 mp_irecv_dv                      48980 15.7    0.788    2.261    0.788    2.261
 mp_sum_l                          6514 12.8    1.546    2.206    1.546    2.206
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.974    2.008
 dbcsr_complete_redistribute        325 12.2    0.321    0.346    1.421    1.888
 cp_fm_upper_to_full                 70 13.6    1.390    1.829    1.390    1.829
 cp_fm_cholesky_decompose            22 10.9    1.691    1.730    1.691    1.730
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.635    1.647
 mp_allgather_i34                  2055 14.4    0.571    1.578    0.571    1.578
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.463    1.474
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    1.364    1.469
 jit_kernel_multiply                  8 16.3    0.552    1.469    0.552    1.469
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.968    1.427
 rs_gather_matrices                 110 12.3    0.138    0.150    1.064    1.381
 mp_waitany                       17072 13.8    1.151    1.295    1.151    1.295
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.261    1.271
 mp_alltoall_z22v                  1111 16.6    1.217    1.245    1.217    1.245
 acc_transpose_blocks             16440 15.4    0.073    0.076    1.216    1.240
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=60.846000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=623.181818, yerr=7.952337
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             734.347264E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65576.
 MP_Allreduce         9838                    600.
 MP_Sync               100
 MP_Alltoall          1496                5863162.
 MP_ISendRecv        10120                  43184.
 MP_Wait             25102
 MP_comm_split          48
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.028    0.041   67.333   67.334
 qs_mol_dyn_low                       1  2.0    0.003    0.003   66.667   66.677
 qs_forces                           11  3.9    0.002    0.002   66.589   66.590
 qs_energies                         11  4.9    0.005    0.030   63.191   63.196
 scf_env_do_scf                      11  5.9    0.001    0.001   54.772   54.775
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   42.277   42.278
 velocity_verlet                     10  3.0    0.001    0.002   38.086   38.088
 dbcsr_multiply_generic            2055 12.4    0.116    0.119   29.908   30.126
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.476   27.585
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.476   27.585
 ot_scf_mini                         99  9.5    0.003    0.003   25.829   25.947
 multiply_cannon                   2055 13.4    0.242    0.261   22.529   23.719
 multiply_cannon_loop              2055 14.4    0.883    0.907   20.997   21.648
 ot_mini                             99 10.5    0.001    0.001   14.482   14.616
 multiply_cannon_multrec          24660 15.4    4.163    6.800   12.600   13.845
 init_scf_loop                       11  6.9    0.001    0.003   12.442   12.443
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.123   12.219
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.014   12.123   12.219
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.747   10.830
 prepare_preconditioner              11  7.9    0.000    0.000   10.675   10.691
 make_preconditioner                 11  8.9    0.000    0.002   10.675   10.691
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.268   10.396
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.862   10.353
 dbcsr_mm_accdrv_process          52304 16.0    6.883    8.737    8.292    9.229
 mp_waitall_1                    126806 16.4    4.911    6.897    4.911    6.897
 sum_up_and_integrate               110 10.3    0.068    0.071    6.500    6.513
 integrate_v_rspace                 110 11.3    0.003    0.003    6.432    6.445
 qs_ot_get_p                        110 10.4    0.001    0.001    6.259    6.423
 make_m2s                          4110 13.4    0.059    0.062    5.654    5.965
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.876    5.885
 calculate_rho_elec                 110  8.6    0.077    0.081    5.875    5.884
 make_images                       4110 14.4    0.575    0.690    5.513    5.820
 init_scf_run                        11  5.9    0.000    0.001    5.594    5.595
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.594    5.595
 cp_fm_upper_to_full                 70 13.8    3.397    4.813    3.397    4.813
 qs_ot_p2m_diag                      48 11.0    0.054    0.063    4.276    4.292
 ot_diis_step                        99 11.5    0.011    0.012    4.171    4.172
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.063    4.169
 apply_single                       110 13.6    0.000    0.000    4.063    4.168
 dbcsr_complete_redistribute        325 12.2    0.463    0.508    2.884    4.057
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.772    3.774
 multiply_cannon_metrocomm3       24660 15.4    0.036    0.037    1.521    3.462
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.402    3.452
 grid_integrate_task_list           110 12.3    3.283    3.439    3.283    3.439
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.271    3.437
 pw_transfer                       1331 11.6    0.065    0.072    3.398    3.430
 multiply_cannon_sync_h2d         24660 15.4    3.176    3.377    3.176    3.377
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.292    3.327
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.175    3.250
 make_images_data                  4110 15.4    0.046    0.050    2.878    3.194
 density_rs2pw                      110  9.6    0.004    0.004    3.020    3.185
 hybrid_alltoall_any               4261 16.3    0.120    0.457    2.445    3.147
 cp_fm_cholesky_invert               11 10.9    3.123    3.134    3.123    3.134
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.128    3.131
 cp_fm_redistribute_end              48 14.0    1.560    3.089    1.561    3.090
 wfi_extrapolate                     11  7.9    0.001    0.001    3.082    3.082
 cp_fm_diag_elpa_base                48 14.0    1.443    2.942    1.524    3.055
 calculate_dm_sparse                110  9.5    0.001    0.001    2.936    2.970
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.804    2.955
 mp_alltoall_i22                    605 13.7    1.708    2.953    1.708    2.953
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.870    2.872
 rs_pw_transfer                     902 11.9    0.010    0.010    2.692    2.867
 fft_wrap_pw1pw2_140                451 13.1    0.201    0.213    2.731    2.768
 fft3d_ps                          1111 14.6    1.064    1.105    2.596    2.617
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.413    2.453
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.423    2.427
 grid_collocate_task_list           110  9.6    2.219    2.374    2.219    2.374
 potential_pw2rs                    110 12.3    0.012    0.013    2.263    2.281
 qs_energies_init_hamiltonians       11  5.9    0.025    0.028    2.273    2.275
 mp_alltoall_d11v                  2046 13.8    1.904    2.155    1.904    2.155
 jit_kernel_multiply                 11 15.6    1.080    1.980    1.080    1.980
 cp_fm_cholesky_decompose            22 10.9    1.877    1.920    1.877    1.920
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.755    1.784
 mp_allgather_i34                  2055 14.4    0.639    1.753    0.639    1.753
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.603    1.705
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.624    1.636
 mp_sum_l                          6514 12.8    1.039    1.571    1.039    1.571
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.531    1.546
 acc_transpose_blocks             24660 15.4    0.106    0.107    1.500    1.516
 multiply_cannon_metrocomm4       20550 15.4    0.057    0.060    0.852    1.422
 mp_alltoall_z22v                  1111 16.6    1.316    1.383    1.316    1.383
 mp_irecv_dv                      62702 16.1    0.753    1.353    0.753    1.353
 mp_waitany                       13376 13.8    1.094    1.349    1.094    1.349
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=67.334000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=694.090909, yerr=11.040871
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             836.612096E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65574.
 MP_Allreduce         9838                    640.
 MP_Sync               100
 MP_Alltoall          1496                8504061.
 MP_ISendRecv         6600                  54848.
 MP_Wait             17226
 MP_comm_split          48
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.190    0.225   56.931   56.932
 qs_mol_dyn_low                       1  2.0    0.003    0.003   56.135   56.197
 qs_forces                           11  3.9    0.020    0.021   56.066   56.066
 qs_energies                         11  4.9    0.001    0.001   52.407   52.410
 scf_env_do_scf                      11  5.9    0.000    0.001   43.838   43.838
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   35.537   35.538
 velocity_verlet                     10  3.0    0.002    0.002   31.512   31.515
 dbcsr_multiply_generic            2055 12.4    0.104    0.107   23.180   23.320
 qs_scf_new_mos                      99  7.5    0.001    0.001   21.270   21.352
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   21.269   21.351
 ot_scf_mini                         99  9.5    0.002    0.002   20.017   20.096
 multiply_cannon                   2055 13.4    0.252    0.273   17.487   18.806
 multiply_cannon_loop              2055 14.4    0.324    0.338   16.133   16.384
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.611   11.659
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   11.611   11.659
 ot_mini                             99 10.5    0.001    0.001   10.666   10.742
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.353   10.396
 multiply_cannon_multrec           8220 15.4    3.246    4.821    7.457    8.601
 mp_waitall_1                    106626 16.5    6.534    8.518    6.534    8.518
 init_scf_loop                       11  6.9    0.000    0.000    8.252    8.254
 qs_ot_get_derivative                99 11.5    0.001    0.001    6.822    6.905
 prepare_preconditioner              11  7.9    0.000    0.000    6.548    6.558
 make_preconditioner                 11  8.9    0.000    0.000    6.548    6.558
 sum_up_and_integrate               110 10.3    0.079    0.081    6.270    6.286
 integrate_v_rspace                 110 11.3    0.003    0.003    6.191    6.205
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.114    6.188
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.720    5.732
 calculate_rho_elec                 110  8.6    0.115    0.115    5.719    5.731
 qs_ot_get_p                        110 10.4    0.001    0.001    5.363    5.443
 init_scf_run                        11  5.9    0.000    0.001    5.344    5.344
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.344    5.344
 dbcsr_mm_accdrv_process          17442 15.9    2.838    3.944    4.080    4.971
 multiply_cannon_metrocomm3        8220 15.4    0.017    0.017    3.146    4.790
 make_m2s                          4110 13.4    0.038    0.040    4.326    4.597
 make_images                       4110 14.4    0.636    0.688    4.196    4.466
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.898    3.902
 ot_diis_step                        99 11.5    0.012    0.014    3.819    3.819
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.734    3.773
 apply_single                       110 13.6    0.000    0.000    3.734    3.773
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.566    3.567
 grid_integrate_task_list           110 12.3    3.378    3.557    3.378    3.557
 pw_transfer                       1331 11.6    0.065    0.069    3.337    3.341
 cp_fm_cholesky_invert               11 10.9    3.247    3.251    3.247    3.251
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.230    3.239
 multiply_cannon_sync_h2d          8220 15.4    2.881    3.025    2.881    3.025
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.993    2.994
 cp_fm_redistribute_end              48 14.0    0.769    2.959    0.775    2.959
 cp_fm_diag_elpa_base                48 14.0    1.993    2.746    2.174    2.906
 make_images_data                  4110 15.4    0.038    0.044    2.463    2.898
 density_rs2pw                      110  9.6    0.004    0.004    2.772    2.856
 wfi_extrapolate                     11  7.9    0.001    0.001    2.799    2.799
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.767    2.767
 fft_wrap_pw1pw2_140                451 13.1    0.212    0.215    2.735    2.747
 hybrid_alltoall_any               4261 16.3    0.199    0.857    2.367    2.729
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.536    2.538
 calculate_dm_sparse                110  9.5    0.001    0.001    2.487    2.517
 fft3d_ps                          1111 14.6    1.111    1.122    2.489    2.500
 grid_collocate_task_list           110  9.6    2.324    2.494    2.324    2.494
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.457    2.459
 rs_pw_transfer                     902 11.9    0.010    0.010    2.325    2.454
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.160    2.170
 potential_pw2rs                    110 12.3    0.015    0.016    1.994    2.000
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.772    1.995
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    1.958    1.993
 mp_alltoall_d11v                  2046 13.8    1.713    1.943    1.713    1.943
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    1.886    1.930
 cp_fm_cholesky_decompose            22 10.9    1.828    1.849    1.828    1.849
 jit_kernel_multiply                 10 15.8    0.936    1.734    0.936    1.734
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.557    1.707
 mp_allgather_i34                  2055 14.4    0.539    1.660    0.539    1.660
 dbcsr_complete_redistribute        325 12.2    0.553    0.576    1.535    1.630
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.593    1.599
 multiply_cannon_metrocomm1        8220 15.4    0.021    0.022    0.860    1.480
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.440    1.449
 multiply_cannon_metrocomm4        6165 15.4    0.017    0.019    0.477    1.379
 mp_irecv_dv                      24056 15.7    0.453    1.336    0.453    1.336
 qs_create_task_list                 11  7.9    0.001    0.001    1.226    1.323
 generate_qs_task_list               11  8.9    0.378    0.447    1.226    1.322
 mp_waitany                        9240 13.8    1.103    1.256    1.103    1.256
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.204    1.256
 mp_alltoall_z22v                  1111 16.6    1.144    1.157    1.144    1.157
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=56.932000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=779.454545, yerr=13.693818
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.315090E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67104.
 MP_Allreduce         9672                    819.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_ISendRecv         4620                 360267.
 MP_Wait              7524
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.159    0.162   87.072   87.073
 qs_mol_dyn_low                       1  2.0    0.034    0.050   86.171   86.179
 qs_forces                           11  3.9    0.002    0.003   85.739   85.756
 qs_energies                         11  4.9    0.001    0.001   81.687   81.705
 scf_env_do_scf                      11  5.9    0.000    0.001   71.851   71.851
 velocity_verlet                     10  3.0    0.002    0.002   55.678   55.726
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.006   42.730   42.732
 init_scf_loop                       11  6.9    0.000    0.000   29.047   29.053
 dbcsr_multiply_generic            2055 12.4    0.120    0.122   28.487   28.564
 prepare_preconditioner              11  7.9    0.000    0.000   27.102   27.108
 make_preconditioner                 11  8.9    0.000    0.000   27.102   27.108
 make_full_inverse_cholesky          11  9.9    0.000    0.000   21.076   26.579
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.350   26.398
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.350   26.397
 ot_scf_mini                         99  9.5    0.002    0.002   24.577   24.611
 multiply_cannon                   2055 13.4    0.350    0.382   21.545   22.272
 multiply_cannon_loop              2055 14.4    0.342    0.344   19.755   20.091
 cp_fm_upper_to_full                 70 14.2   12.972   18.724   12.972   18.724
 ot_mini                             99 10.5    0.001    0.001   13.549   13.585
 rebuild_ks_matrix                  110  8.3    0.000    0.001   13.130   13.172
 qs_ks_build_kohn_sham_matrix       110  9.3    0.014    0.014   13.129   13.172
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.945   11.984
 dbcsr_complete_redistribute        325 12.2    1.024    1.044    7.608   11.093
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.585   10.064
 multiply_cannon_multrec           8220 15.4    4.362    4.592    9.585    9.699
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.010    9.448
 mp_alltoall_i22                    605 13.7    5.636    9.133    5.636    9.133
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.004    9.040
 mp_waitall_1                     87304 16.6    7.799    8.679    7.799    8.679
 sum_up_and_integrate               110 10.3    0.151    0.152    6.635    6.648
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.484    6.520
 calculate_rho_elec                 110  8.6    0.227    0.227    6.483    6.519
 integrate_v_rspace                 110 11.3    0.004    0.004    6.484    6.497
 qs_ot_get_p                        110 10.4    0.001    0.001    5.830    5.884
 make_m2s                          4110 13.4    0.043    0.043    5.193    5.700
 init_scf_run                        11  5.9    0.000    0.001    5.587    5.587
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.586    5.587
 cp_fm_cholesky_invert               11 10.9    5.529    5.533    5.529    5.533
 make_images                       4110 14.4    0.879    0.927    5.007    5.513
 dbcsr_mm_accdrv_process          11614 15.7    3.301    3.947    5.079    5.329
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.018    4.815    5.163
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.659    5.127
 apply_single                       110 13.6    0.000    0.000    4.659    5.126
 ot_diis_step                        99 11.5    0.015    0.015    4.522    4.523
 qs_ot_p2m_diag                      48 11.0    0.150    0.155    4.277    4.283
 multiply_cannon_sync_h2d          8220 15.4    3.947    3.955    3.947    3.955
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.831    3.832
 pw_transfer                       1331 11.6    0.073    0.074    3.766    3.771
 grid_integrate_task_list           110 12.3    3.665    3.716    3.665    3.716
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.650    3.668
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    3.650    3.655
 hybrid_alltoall_any               4261 16.3    0.256    0.552    2.868    3.535
 make_images_data                  4110 15.4    0.042    0.045    2.814    3.486
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.883    3.331
 wfi_extrapolate                     11  7.9    0.001    0.001    3.228    3.228
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.207    3.207
 cp_fm_diag_elpa_base                48 14.0    2.660    2.866    3.205    3.206
 calculate_dm_sparse                110  9.5    0.001    0.001    3.161    3.185
 fft_wrap_pw1pw2_140                451 13.1    0.215    0.216    3.154    3.163
 density_rs2pw                      110  9.6    0.004    0.004    2.984    3.002
 fft3d_ps                          1111 14.6    1.265    1.274    2.859    2.864
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.839    2.844
 grid_collocate_task_list           110  9.6    2.627    2.653    2.627    2.653
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.392    2.414
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.339    2.351
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.252    2.254
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    2.174    2.238
 rs_pw_transfer                     902 11.9    0.010    0.011    2.143    2.219
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.098    2.187
 cp_fm_cholesky_decompose            22 10.9    2.087    2.106    2.087    2.106
 mp_alltoall_d11v                  2046 13.8    2.030    2.071    2.030    2.071
 potential_pw2rs                    110 12.3    0.021    0.021    2.063    2.067
 qs_create_task_list                 11  7.9    0.000    0.000    1.894    1.940
 generate_qs_task_list               11  8.9    0.738    0.790    1.894    1.939
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.792    1.796
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.748    1.772
 jit_kernel_multiply                 10 15.4    1.578    1.759    1.578    1.759
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=87.073000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1184.363636, yerr=56.253117
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             631.758848E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175954870160
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4075                  56898.
 MP_Allreduce        11228                    786.
 MP_Sync               170
 MP_Alltoall          2226                3352577.
 MP_ISendRecv        48640                  18752.
 MP_Wait             66796
 MP_comm_split          83
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.088    0.212  207.636  207.639
 qs_mol_dyn_low                       1  2.0    0.030    0.068  206.545  206.559
 qs_forces                           11  3.9    0.023    0.055  206.409  206.437
 qs_energies                         11  4.9    0.008    0.023  200.874  200.927
 scf_env_do_scf                      11  5.9    0.001    0.021  183.562  183.566
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  162.487  162.489
 dbcsr_multiply_generic            2507 12.6    0.175    0.179  124.864  125.374
 velocity_verlet                     10  3.0    0.006    0.007  124.312  124.313
 qs_scf_new_mos                     117  7.6    0.001    0.001  123.343  123.551
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  123.342  123.551
 ot_scf_mini                        117  9.6    0.003    0.004  116.748  116.901
 multiply_cannon                   2507 13.6    0.239    0.247  101.110  103.556
 multiply_cannon_loop              2507 14.6    2.128    2.204   98.765  100.733
 ot_mini                            117 10.6    0.001    0.001   66.448   66.636
 multiply_cannon_multrec          60168 15.6   33.049   35.006   41.442   43.208
 qs_ot_get_derivative               117 11.6    0.001    0.001   41.648   41.813
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.409   33.663
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.019   33.408   33.662
 mp_waitall_1                    291448 16.2   29.241   32.476   29.241   32.476
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.074   30.325
 multiply_cannon_sync_h2d         60168 15.6   27.175   28.979   27.175   28.979
 qs_ot_get_p                        128 10.4    0.001    0.001   28.404   28.630
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.193   24.827
 apply_single                       128 13.6    0.001    0.001   24.192   24.827
 ot_diis_step                       117 11.6    0.007    0.008   24.448   24.449
 qs_ot_p2m_diag                      83 11.4    0.077    0.091   21.542   21.621
 init_scf_loop                       11  6.9    0.001    0.004   20.975   20.976
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   19.332   19.490
 multiply_cannon_metrocomm3       60168 15.6    0.110    0.115   15.884   19.474
 cp_dbcsr_syevd                      83 12.4    0.004    0.005   18.849   18.850
 prepare_preconditioner              11  7.9    0.000    0.000   16.366   16.416
 make_preconditioner                 11  8.9    0.000    0.003   16.366   16.416
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   15.813   15.819
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.589   15.769
 cp_fm_redistribute_end              83 14.4   12.474   15.722   12.487   15.725
 cp_fm_diag_elpa_base                83 14.4    3.188   15.420    3.224   15.539
 make_m2s                          5014 13.6    0.103    0.110   13.929   14.256
 sum_up_and_integrate               128 10.3    0.090    0.110   14.083   14.101
 make_images                       5014 14.6    0.403    0.422   13.749   14.087
 integrate_v_rspace                 128 11.3    0.003    0.004   13.993   14.011
 qs_rho_update_rho_low              128  7.7    0.001    0.001   12.926   13.032
 calculate_rho_elec                 128  8.7    0.045    0.063   12.925   13.031
 init_scf_run                        11  5.9    0.000    0.001   12.686   12.687
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.686   12.687
 mp_sum_l                          7870 13.0    8.599    9.746    8.599    9.746
 cp_fm_cholesky_invert               11 10.9    9.440    9.449    9.440    9.449
 wfi_extrapolate                     11  7.9    0.001    0.001    9.072    9.072
 calculate_dm_sparse                128  9.5    0.001    0.001    8.394    8.477
 dbcsr_mm_accdrv_process         124484 16.2    3.364    3.527    7.961    8.457
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.328    8.440
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    7.805    7.921
 multiply_cannon_metrocomm1       60168 15.6    0.087    0.091    6.178    7.911
 make_images_data                  5014 15.6    0.070    0.076    6.816    7.823
 grid_integrate_task_list           128 12.3    7.090    7.607    7.090    7.607
 pw_transfer                       1547 11.6    0.075    0.107    6.985    7.269
 density_rs2pw                      128  9.7    0.006    0.007    6.670    7.259
 hybrid_alltoall_any               5200 16.5    0.290    2.266    5.940    7.129
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.013    6.780    7.042
 rs_pw_transfer                    1046 11.9    0.017    0.019    6.057    6.776
 cp_dbcsr_sm_fm_multiply             37  9.5    0.003    0.004    6.677    6.686
 mp_alltoall_d11v                  2415 14.1    4.321    5.912    4.321    5.912
 fft_wrap_pw1pw2_140                523 13.2    0.441    0.509    5.704    5.903
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.774    5.838
 fft3d_ps                          1291 14.7    2.092    2.567    5.577    5.772
 grid_collocate_task_list           128  9.7    4.727    5.093    4.727    5.093
 cp_fm_cholesky_decompose            22 10.9    4.704    4.718    4.704    4.718
 potential_pw2rs                    128 12.3    0.010    0.011    4.564    4.606
 mp_sum_d                          4459 12.1    3.766    4.449    3.766    4.449
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=207.639000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=597.181818, yerr=8.451123
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430458527744       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1958505086976       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986244964352       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992000282624       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753956716544       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613083000832       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239146475520       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239146475520       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911124992000       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.228655E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.199914E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806314816       0.0%      0.0%    100.0%
 number of processed stacks               6022464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.2
 marketing flops                   145.647559E+12
 -------------------------------------------------------------------------------
 # multiplications                           2527
 max memory usage/rank             830.693376E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2425920
 MPI messages size (bytes):
  total size                         4.132350E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.703416E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               71436               2336489472
     32768 < size <=   131072              728832              55956209664
    131072 < size <=  4194304             1386864            1409906900992
   4194304 < size <= 16777216              155760            1473826487232
  16777216 < size                           68112            1190343475200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4113                  56823.
 MP_Allreduce        11322                    944.
 MP_Sync               170
 MP_Alltoall          1983                5090107.
 MP_ISendRecv        24252                  47072.
 MP_Wait             38240
 MP_comm_split          83
 MP_ISend            11836                 212447.
 MP_IRecv            11836                 212447.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.066  191.190  191.198
 qs_mol_dyn_low                       1  2.0    0.013    0.052  190.577  190.589
 qs_forces                           11  3.9    0.003    0.005  189.707  189.715
 qs_energies                         11  4.9    0.001    0.002  183.002  183.020
 scf_env_do_scf                      11  5.9    0.001    0.002  166.515  166.525
 scf_env_do_scf_inner_loop          118  6.6    0.005    0.024  133.618  133.621
 velocity_verlet                     10  3.0    0.002    0.002  120.616  120.633
 dbcsr_multiply_generic            2527 12.6    0.186    0.191   98.015   99.134
 qs_scf_new_mos                     118  7.6    0.001    0.001   94.749   95.195
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   94.748   95.194
 ot_scf_mini                        118  9.6    0.004    0.004   89.915   90.480
 multiply_cannon                   2527 13.6    0.478    0.530   77.873   82.010
 multiply_cannon_loop              2527 14.6    1.270    1.308   74.565   77.223
 ot_mini                            118 10.6    0.001    0.001   50.075   50.579
 mp_waitall_1                    228564 16.4   25.125   39.690   25.125   39.690
 multiply_cannon_multrec          30324 15.6   22.065   26.684   31.762   36.828
 rebuild_ks_matrix                  129  8.3    0.001    0.001   32.920   33.473
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.019   32.920   33.473
 init_scf_loop                       11  6.9    0.000    0.000   32.806   32.808
 qs_ks_update_qs_env                129  7.6    0.001    0.001   29.692   30.195
 multiply_cannon_metrocomm3       30324 15.6    0.096    0.102   15.888   28.772
 qs_ot_get_derivative               118 11.6    0.001    0.002   28.019   28.574
 prepare_preconditioner              11  7.9    0.000    0.000   28.431   28.492
 make_preconditioner                 11  8.9    0.000    0.000   28.431   28.491
 make_full_inverse_cholesky          11  9.9    0.000    0.000   27.146   27.690
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   22.118   23.277
 apply_single                       129 13.6    0.001    0.001   22.118   23.276
 multiply_cannon_sync_h2d         30324 15.6   19.302   22.238   19.302   22.238
 qs_ot_get_p                        129 10.4    0.001    0.001   21.521   22.116
 ot_diis_step                       118 11.6    0.014    0.015   21.879   21.882
 qs_ot_p2m_diag                      83 11.4    0.188    0.218   16.651   16.686
 cp_fm_cholesky_invert               11 10.9   16.635   16.648   16.635   16.648
 make_m2s                          5054 13.6    0.089    0.095   14.313   15.960
 make_images                       5054 14.6    1.159    1.350   14.106   15.751
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   15.484   15.502
 sum_up_and_integrate               129 10.3    0.117    0.133   14.452   14.480
 integrate_v_rspace                 129 11.3    0.003    0.004   14.334   14.367
 qs_rho_update_rho_low              129  7.7    0.001    0.001   13.029   13.068
 calculate_rho_elec                 129  8.7    0.088    0.106   13.028   13.068
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   12.371   12.383
 cp_fm_redistribute_end              83 14.4    7.252   12.315    7.266   12.318
 cp_fm_diag_elpa_base                83 14.4    4.816   11.840    5.031   12.199
 init_scf_run                        11  5.9    0.000    0.001   11.647   11.648
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   11.646   11.648
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   10.949   11.364
 multiply_cannon_metrocomm4       27797 15.6    0.097    0.111    3.829   10.948
 make_images_data                  5054 15.6    0.068    0.074    8.685   10.619
 mp_irecv_dv                      70031 16.3    3.634   10.562    3.634   10.562
 hybrid_alltoall_any               5240 16.5    0.343    1.500    7.379    9.879
 dbcsr_mm_accdrv_process          62734 16.2    4.619    5.490    9.153    9.707
 wfi_extrapolate                     11  7.9    0.001    0.001    8.338    8.338
 pw_transfer                       1559 11.6    0.085    0.101    7.792    7.861
 grid_integrate_task_list           129 12.3    7.234    7.647    7.234    7.647
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.011    7.566    7.626
 density_rs2pw                      129  9.7    0.006    0.006    6.765    7.202
 qs_ot_get_derivative_taylor         41 13.0    0.001    0.001    6.338    7.105
 cp_fm_cholesky_decompose            22 10.9    6.919    7.006    6.919    7.006
 calculate_dm_sparse                129  9.5    0.001    0.001    6.579    6.738
 fft_wrap_pw1pw2_140                527 13.2    0.474    0.523    6.601    6.663
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.004    6.177    6.185
 rs_pw_transfer                    1054 12.0    0.015    0.017    5.637    6.149
 fft3d_ps                          1301 14.7    2.792    2.948    5.926    5.957
 mp_sum_l                          7930 13.1    4.102    5.895    4.102    5.895
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.323    5.450
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    5.349    5.412
 grid_collocate_task_list           129  9.7    4.948    5.332    4.948    5.332
 mp_allgather_i34                  2527 14.6    1.875    5.030    1.875    5.030
 potential_pw2rs                    129 12.3    0.015    0.018    4.813    4.828
 mp_alltoall_d11v                  2423 14.1    4.138    4.664    4.138    4.664
 mp_sum_d                          4496 12.2    2.668    4.011    2.668    4.011
 dbcsr_complete_redistribute        395 12.7    0.766    0.838    3.119    3.975
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=191.198000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=789.818182, yerr=6.671761
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.928533E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               3984192       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1695.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             938.672128E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1042912
 MPI messages size (bytes):
  total size                         2.716210E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.604448E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              281856              36943429632
    131072 < size <=  4194304              660064             996105256960
   4194304 < size <= 16777216               65632             931530938576
  16777216 < size                           28672             751619276800
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4075                  57335.
 MP_Allreduce        11226                    986.
 MP_Sync               170
 MP_Alltoall          1712                9388896.
 MP_ISendRecv        15872                  75008.
 MP_Wait             29756
 MP_comm_split          83
 MP_ISend            11748                 275205.
 MP_IRecv            11748                 275205.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.047    0.089  175.366  175.367
 qs_mol_dyn_low                       1  2.0    0.003    0.003  174.514  174.527
 qs_forces                           11  3.9    0.003    0.004  174.413  174.419
 qs_energies                         11  4.9    0.002    0.011  167.900  167.912
 scf_env_do_scf                      11  5.9    0.001    0.002  152.470  152.470
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  117.701  117.702
 velocity_verlet                     10  3.0    0.001    0.002  111.973  111.975
 dbcsr_multiply_generic            2507 12.6    0.181    0.185   81.542   82.710
 qs_scf_new_mos                     117  7.6    0.001    0.001   80.790   81.082
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   80.789   81.081
 ot_scf_mini                        117  9.6    0.003    0.004   76.655   77.006
 multiply_cannon                   2507 13.6    0.503    0.522   61.870   66.906
 multiply_cannon_loop              2507 14.6    0.864    0.896   58.594   61.444
 ot_mini                            117 10.6    0.001    0.001   42.445   42.779
 init_scf_loop                       11  6.9    0.000    0.000   34.669   34.671
 mp_waitall_1                    178456 16.5   25.082   34.197   25.082   34.197
 prepare_preconditioner              11  7.9    0.000    0.000   30.679   30.728
 make_preconditioner                 11  8.9    0.000    0.000   30.679   30.728
 rebuild_ks_matrix                  128  8.3    0.001    0.001   30.049   30.491
 qs_ks_build_kohn_sham_matrix       128  9.3    0.016    0.018   30.049   30.490
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.265   29.710
 qs_ks_update_qs_env                128  7.6    0.001    0.001   27.131   27.531
 multiply_cannon_multrec          20056 15.6   13.549   16.590   22.211   25.340
 multiply_cannon_metrocomm3       20056 15.6    0.058    0.061   15.060   24.437
 qs_ot_get_derivative               117 11.6    0.001    0.002   22.804   23.154
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.800   20.769
 apply_single                       128 13.6    0.001    0.001   19.800   20.768
 qs_ot_get_p                        128 10.4    0.001    0.001   19.570   20.004
 ot_diis_step                       117 11.6    0.017    0.018   19.543   19.543
 make_m2s                          5014 13.6    0.080    0.086   14.624   15.865
 multiply_cannon_sync_h2d         20056 15.6   14.301   15.752   14.301   15.752
 make_images                       5014 14.6    1.165    1.265   14.393   15.634
 qs_ot_p2m_diag                      83 11.4    0.265    0.272   15.291   15.299
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   14.298   14.300
 cp_fm_cholesky_invert               11 10.9   14.274   14.283   14.274   14.283
 sum_up_and_integrate               128 10.3    0.134    0.145   14.081   14.104
 integrate_v_rspace                 128 11.3    0.004    0.013   13.946   13.974
 qs_rho_update_rho_low              128  7.7    0.001    0.001   12.953   12.975
 calculate_rho_elec                 128  8.7    0.132    0.146   12.952   12.974
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   11.312   11.313
 cp_fm_redistribute_end              83 14.4    4.266   11.257    4.280   11.259
 cp_fm_diag_elpa_base                83 14.4    6.554   10.675    6.960   11.159
 make_images_data                  5014 15.6    0.061    0.069    9.004   10.714
 init_scf_run                        11  5.9    0.000    0.001   10.308   10.309
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   10.308   10.309
 hybrid_alltoall_any               5200 16.5    0.434    1.980    7.912    9.928
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    8.940    9.207
 multiply_cannon_metrocomm4       17549 15.6    0.063    0.071    3.417    9.086
 mp_irecv_dv                      50230 16.2    3.293    8.841    3.293    8.841
 dbcsr_mm_accdrv_process          41502 16.2    4.463    5.038    8.108    8.245
 pw_transfer                       1547 11.6    0.084    0.103    7.731    7.844
 grid_integrate_task_list           128 12.3    7.296    7.659    7.296    7.659
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    7.509    7.625
 wfi_extrapolate                     11  7.9    0.001    0.001    7.356    7.356
 cp_fm_upper_to_full                105 14.5    5.682    7.330    5.682    7.330
 cp_fm_cholesky_decompose            22 10.9    7.268    7.304    7.268    7.304
 density_rs2pw                      128  9.7    0.006    0.006    6.397    6.737
 fft_wrap_pw1pw2_140                523 13.2    0.479    0.526    6.613    6.730
 dbcsr_complete_redistribute        395 12.7    1.166    1.197    4.492    6.215
 fft3d_ps                          1291 14.7    2.696    2.900    5.775    5.849
 calculate_dm_sparse                128  9.5    0.001    0.001    5.726    5.826
 grid_collocate_task_list           128  9.7    5.071    5.588    5.071    5.588
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.411    5.415
 rs_pw_transfer                    1046 11.9    0.013    0.014    4.965    5.380
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.550    5.279
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.325    5.051
 mp_alltoall_d11v                  2415 14.1    4.175    4.698    4.175    4.698
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.542    4.656
 mp_allgather_i34                  2507 14.6    1.798    4.646    1.798    4.646
 mp_sum_l                          7870 13.0    3.233    4.594    3.233    4.594
 potential_pw2rs                    128 12.3    0.020    0.022    4.427    4.445
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    2.394    4.086
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.973    3.997
 mp_alltoall_i22                    716 14.1    2.008    3.895    2.008    3.895
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    3.804    3.805
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=175.367000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=887.181818, yerr=10.760350
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022950912       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963542011904       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444706349056       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019182452736       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019182452736       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796564E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.320337E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499488       0.0%      0.0%    100.0%
 number of processed stacks               5927808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1131.2
 marketing flops                   143.508480E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               1.153466E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1133160
 MPI messages size (bytes):
  total size                         2.008142E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.772161E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              315952              35695099904
    131072 < size <=  4194304              709496             778939400192
   4194304 < size <= 16777216               69840             660837542000
  16777216 < size                           30480             532676608000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4077                  57301.
 MP_Allreduce        11231                   1068.
 MP_Sync               168
 MP_Alltoall          1700               12496381.
 MP_ISendRecv        11684                  75008.
 MP_Wait             28114
 MP_comm_split          82
 MP_ISend            14840                 244848.
 MP_IRecv            14840                 244848.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.045    0.119  187.643  187.644
 qs_mol_dyn_low                       1  2.0    0.003    0.003  186.837  186.850
 qs_forces                           11  3.9    0.003    0.004  186.728  186.735
 qs_energies                         11  4.9    0.004    0.007  179.694  179.708
 scf_env_do_scf                      11  5.9    0.001    0.002  162.983  162.994
 velocity_verlet                     10  3.0    0.001    0.002  123.586  123.588
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  116.045  116.046
 qs_scf_new_mos                     116  7.6    0.001    0.001   80.882   81.192
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   80.882   81.191
 dbcsr_multiply_generic            2485 12.5    0.188    0.193   78.830   79.388
 ot_scf_mini                        116  9.6    0.003    0.004   76.407   76.719
 multiply_cannon                   2485 13.5    0.551    0.589   54.223   56.477
 multiply_cannon_loop              2485 14.5    1.171    1.201   50.524   51.771
 init_scf_loop                       11  6.9    0.000    0.001   46.812   46.813
 prepare_preconditioner              11  7.9    0.000    0.000   42.703   42.734
 make_preconditioner                 11  8.9    0.000    0.001   42.703   42.734
 ot_mini                            116 10.6    0.001    0.001   42.089   42.415
 make_full_inverse_cholesky          11  9.9    0.000    0.000   36.284   41.345
 multiply_cannon_multrec          29820 15.5   14.116   18.868   25.962   30.474
 rebuild_ks_matrix                  127  8.3    0.001    0.001   28.818   29.041
 qs_ks_build_kohn_sham_matrix       127  9.3    0.016    0.018   28.817   29.040
 mp_waitall_1                    152434 16.5   17.327   27.261   17.327   27.261
 qs_ks_update_qs_env                127  7.6    0.001    0.001   26.024   26.223
 qs_ot_get_derivative               116 11.6    0.001    0.002   22.612   22.926
 make_m2s                          4970 13.5    0.094    0.099   20.203   21.200
 qs_ot_get_p                        127 10.4    0.001    0.001   20.597   20.910
 make_images                       4970 14.5    1.954    2.297   19.899   20.896
 apply_preconditioner_dbcsr         127 12.6    0.000    0.001   18.951   19.338
 apply_single                       127 13.6    0.001    0.001   18.951   19.338
 ot_diis_step                       116 11.6    0.017    0.018   19.333   19.335
 cp_fm_upper_to_full                104 14.7   11.183   16.564   11.183   16.564
 qs_ot_p2m_diag                      82 11.4    0.338    0.385   16.497   16.550
 cp_fm_cholesky_invert               11 10.9   16.346   16.356   16.346   16.356
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   15.216   15.217
 multiply_cannon_metrocomm3       29820 15.5    0.046    0.049    6.224   15.217
 sum_up_and_integrate               127 10.3    0.140    0.154   13.998   14.023
 integrate_v_rspace                 127 11.3    0.003    0.004   13.858   13.890
 qs_rho_update_rho_low              127  7.7    0.001    0.001   13.016   13.071
 calculate_rho_elec                 127  8.7    0.174    0.189   13.015   13.070
 dbcsr_complete_redistribute        393 12.7    1.514    1.625    9.072   12.876
 multiply_cannon_sync_h2d         29820 15.5   11.705   12.875   11.705   12.875
 make_images_data                  4970 15.5    0.064    0.068   10.868   12.565
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   12.093   12.096
 cp_fm_redistribute_end              82 14.4    2.077   12.002    2.095   12.008
 dbcsr_mm_accdrv_process          61748 16.2    7.306    8.309   11.427   11.893
 cp_fm_diag_elpa_base                82 14.4    9.296   11.388    9.878   11.865
 copy_fm_to_dbcsr                   208 11.6    0.001    0.002    7.675   11.476
 hybrid_alltoall_any               5155 16.4    0.521    2.176    9.804   11.398
 init_scf_run                        11  5.9    0.000    0.001   10.722   10.723
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.721   10.723
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.397   10.097
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.332    9.559
 mp_alltoall_i22                    712 14.1    5.627    9.405    5.627    9.405
 grid_integrate_task_list           127 12.3    7.453    7.890    7.453    7.890
 pw_transfer                       1535 11.6    0.084    0.098    7.776    7.853
 cp_fm_cholesky_decompose            22 10.9    7.697    7.791    7.697    7.791
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    7.552    7.636
 wfi_extrapolate                     11  7.9    0.001    0.001    7.587    7.587
 multiply_cannon_metrocomm4       24850 15.5    0.074    0.084    2.682    7.357
 mp_irecv_dv                      75445 16.2    2.540    7.091    2.540    7.091
 fft_wrap_pw1pw2_140                519 13.2    0.477    0.485    6.653    6.754
 density_rs2pw                      127  9.7    0.006    0.006    6.209    6.611
 calculate_dm_sparse                127  9.5    0.001    0.001    6.178    6.258
 fft3d_ps                          1281 14.7    2.775    2.848    5.792    5.846
 grid_collocate_task_list           127  9.7    5.165    5.715    5.165    5.715
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.419    5.496
 mp_alltoall_d11v                  2401 14.1    4.780    5.480    4.780    5.480
 rs_pw_transfer                    1038 11.9    0.013    0.015    4.627    4.971
 qs_energies_init_hamiltonians       11  5.9    0.015    0.029    4.491    4.492
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.366    4.455
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.336    4.404
 potential_pw2rs                    127 12.3    0.022    0.023    4.282    4.305
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    4.194    4.259
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=187.644000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1081.727273, yerr=24.140087
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.865088E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               1960712       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3445.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.523577E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  240672
 MPI messages size (bytes):
  total size                         1.331455E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.532237E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              113904              59718500352
   4194304 < size <= 16777216              104976             550376570880
  16777216 < size                           20208             721350092304
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8931                     51.
 MP_Alltoall          9654                 799394.
 MP_ISend            40068                2102572.
 MP_IRecv            40068                2101675.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58352.
 MP_Allreduce        10977                   1175.
 MP_Sync                87
 MP_Alltoall          1712               18838210.
 MP_ISendRecv         7680                 122880.
 MP_Wait             19962
 MP_ISend            10680                 423556.
 MP_IRecv            10680                 423556.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.056    0.067  173.705  173.706
 qs_mol_dyn_low                       1  2.0    0.003    0.003  173.149  173.161
 qs_forces                           11  3.9    0.003    0.004  173.041  173.044
 qs_energies                         11  4.9    0.001    0.002  165.734  165.742
 scf_env_do_scf                      11  5.9    0.001    0.001  148.681  148.694
 velocity_verlet                     10  3.0    0.015    0.019  113.030  113.034
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  112.666  112.668
 qs_scf_new_mos                     117  7.6    0.001    0.001   76.945   77.013
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   76.944   77.012
 dbcsr_multiply_generic            2507 12.6    0.182    0.188   74.062   74.430
 ot_scf_mini                        117  9.6    0.004    0.004   72.542   72.578
 multiply_cannon                   2507 13.6    0.583    0.615   54.326   58.313
 multiply_cannon_loop              2507 14.6    0.452    0.486   49.723   50.442
 ot_mini                            117 10.6    0.001    0.001   39.687   39.725
 init_scf_loop                       11  6.9    0.000    0.000   35.856   35.858
 mp_waitall_1                    129618 16.6   26.152   33.525   26.152   33.525
 prepare_preconditioner              11  7.9    0.000    0.000   31.993   32.021
 make_preconditioner                 11  8.9    0.000    0.000   31.993   32.021
 make_full_inverse_cholesky          11  9.9    0.000    0.000   29.974   30.254
 rebuild_ks_matrix                  128  8.3    0.001    0.001   28.606   28.651
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.017   28.606   28.650
 qs_ks_update_qs_env                128  7.6    0.001    0.001   26.132   26.168
 multiply_cannon_multrec          10028 15.6   10.441   14.507   17.718   21.063
 qs_ot_get_p                        128 10.4    0.001    0.001   19.966   20.028
 qs_ot_get_derivative               117 11.6    0.002    0.002   19.797   19.846
 ot_diis_step                       117 11.6    0.019    0.020   19.822   19.823
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.431   19.613
 apply_single                       128 13.6    0.001    0.001   19.431   19.612
 multiply_cannon_metrocomm3       10028 15.6    0.022    0.024   12.347   19.054
 cp_fm_cholesky_invert               11 10.9   18.386   18.392   18.386   18.392
 make_m2s                          5014 13.6    0.065    0.070   16.086   18.342
 make_images                       5014 14.6    2.316    2.829   15.783   18.039
 qs_ot_p2m_diag                      83 11.4    0.495    0.502   16.119   16.139
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   14.943   14.945
 sum_up_and_integrate               128 10.3    0.180    0.191   14.153   14.196
 integrate_v_rspace                 128 11.3    0.004    0.004   13.972   14.026
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.243   13.286
 calculate_rho_elec                 128  8.7    0.258    0.269   13.242   13.285
 make_images_data                  5014 15.6    0.053    0.061    9.710   12.196
 multiply_cannon_sync_h2d         10028 15.6   11.566   12.153   11.566   12.153
 hybrid_alltoall_any               5200 16.5    0.837    3.775    9.502   12.064
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   11.741   11.746
 cp_fm_diag_elpa_base                83 14.4   11.494   11.568   11.734   11.739
 init_scf_run                        11  5.9    0.000    0.001   10.256   10.256
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.255   10.256
 grid_integrate_task_list           128 12.3    7.701    8.192    7.701    8.192
 cp_fm_cholesky_decompose            22 10.9    8.061    8.181    8.061    8.181
 qs_ot_get_derivative_diag           77 12.4    0.002    0.003    8.025    8.058
 pw_transfer                       1547 11.6    0.084    0.092    7.831    7.852
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.010    7.610    7.640
 dbcsr_mm_accdrv_process          20762 16.1    2.584    3.428    6.911    7.533
 wfi_extrapolate                     11  7.9    0.001    0.001    7.336    7.336
 multiply_cannon_metrocomm1       10028 15.6    0.029    0.030    4.461    7.300
 mp_allgather_i34                  2507 14.6    2.809    6.866    2.809    6.866
 fft_wrap_pw1pw2_140                523 13.2    0.502    0.521    6.644    6.684
 density_rs2pw                      128  9.7    0.005    0.006    6.058    6.264
 calculate_dm_sparse                128  9.5    0.001    0.001    6.098    6.188
 fft3d_ps                          1291 14.7    2.723    2.807    5.771    5.797
 grid_collocate_task_list           128  9.7    5.495    5.728    5.495    5.728
 dbcsr_complete_redistribute        395 12.7    2.108    2.168    5.201    5.569
 mp_alltoall_d11v                  2415 14.1    4.770    5.567    4.770    5.567
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    5.234    5.235
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.146    5.157
 rs_pw_transfer                    1046 11.9    0.013    0.013    4.258    4.498
 multiply_cannon_metrocomm4        7521 15.6    0.024    0.026    1.856    4.458
 mp_irecv_dv                      28860 15.9    1.820    4.385    1.820    4.385
 potential_pw2rs                    128 12.3    0.026    0.028    4.178    4.195
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.046    4.080
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.559    3.869
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.718    3.745
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    3.601    3.671
 copy_fm_to_dbcsr                   209 11.7    0.001    0.002    3.347    3.661
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.466    3.479
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=173.706000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1424.636364, yerr=50.492615
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410023282688       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444707676160       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796579E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.606413E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705500928       0.0%      0.0%    100.0%
 number of processed stacks               1947808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3442.6
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               2.932748E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   99400
 MPI messages size (bytes):
  total size                         1.127422E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.342275E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               44768              34745614336
   4194304 < size <= 16777216               43984             376564613120
  16777216 < size                           10032             716108638608
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  59127.
 MP_Allreduce        11005                   1515.
 MP_Sync                86
 MP_Alltoall          1700               36954383.
 MP_ISendRecv         3556                 218624.
 MP_Wait             11506
 MP_ISend             6360                1080477.
 MP_IRecv             6360                1080477.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.051    0.076  290.185  290.185
 qs_mol_dyn_low                       1  2.0    0.003    0.004  289.432  289.444
 qs_forces                           11  3.9    0.066    0.067  289.339  289.342
 qs_energies                         11  4.9    0.002    0.002  280.722  280.736
 scf_env_do_scf                      11  5.9    0.001    0.002  258.952  258.963
 velocity_verlet                     10  3.0    0.002    0.002  209.438  209.445
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  132.215  132.216
 init_scf_loop                       11  6.9    0.000    0.000  126.472  126.474
 prepare_preconditioner              11  7.9    0.000    0.000  121.707  121.741
 make_preconditioner                 11  8.9    0.000    0.000  121.707  121.741
 make_full_inverse_cholesky          11  9.9    0.000    0.000   96.765  118.863
 qs_scf_new_mos                     116  7.6    0.001    0.001   90.454   90.554
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   90.453   90.553
 ot_scf_mini                        116  9.6    0.004    0.004   85.710   85.747
 dbcsr_multiply_generic            2485 12.5    0.212    0.219   82.183   82.749
 cp_fm_upper_to_full                104 14.8   54.208   78.189   54.208   78.189
 multiply_cannon                   2485 13.5    0.711    0.796   58.002   58.734
 multiply_cannon_loop              2485 14.5    0.468    0.473   54.390   55.779
 dbcsr_complete_redistribute        393 12.7    4.006    4.068   30.710   44.205
 ot_mini                            116 10.6    0.001    0.001   43.693   43.730
 copy_fm_to_dbcsr                   208 11.6    0.001    0.002   27.264   40.737
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000   24.897   38.271
 mp_alltoall_i22                    712 14.1   22.701   36.419   22.701   36.419
 cp_fm_cholesky_invert               11 10.9   33.437   33.444   33.437   33.444
 rebuild_ks_matrix                  127  8.3    0.001    0.001   33.340   33.384
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.018   33.339   33.384
 mp_waitall_1                    104546 16.7   27.852   31.729   27.852   31.729
 qs_ks_update_qs_env                127  7.6    0.001    0.001   31.104   31.149
 qs_ot_get_p                        127 10.4    0.001    0.001   26.780   26.798
 qs_ot_get_derivative               116 11.6    0.002    0.002   23.988   24.028
 qs_ot_p2m_diag                      82 11.4    0.868    0.873   22.625   22.651
 cp_dbcsr_syevd                      82 12.4    0.005    0.006   20.912   20.913
 make_m2s                          4970 13.5    0.075    0.077   19.712   20.666
 make_images                       4970 14.5    3.737    3.826   19.240   20.195
 ot_diis_step                       116 11.6    0.022    0.023   19.679   19.679
 multiply_cannon_metrocomm3        9940 15.5    0.023    0.023   18.433   19.547
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   18.904   19.097
 apply_single                       127 13.6    0.001    0.001   18.904   19.097
 multiply_cannon_multrec           9940 15.5   10.431   12.171   17.871   17.976
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   17.712   17.714
 cp_fm_diag_elpa_base                82 14.4   13.256   14.933   17.707   17.709
 sum_up_and_integrate               127 10.3    0.318    0.321   15.657   15.756
 multiply_cannon_sync_h2d          9940 15.5   15.533   15.549   15.533   15.549
 integrate_v_rspace                 127 11.3    0.004    0.004   15.338   15.437
 qs_rho_update_rho_low              127  7.7    0.001    0.001   14.929   14.942
 calculate_rho_elec                 127  8.7    0.479    0.480   14.928   14.942
 hybrid_alltoall_any               5155 16.4    1.293    3.019   10.626   12.447
 make_images_data                  4970 15.5    0.060    0.065   10.478   12.167
 init_scf_run                        11  5.9    0.000    0.001   11.836   11.836
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   11.835   11.836
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.388    9.421
 dbcsr_mm_accdrv_process          20590 16.0    3.800    5.659    7.204    8.992
 cp_fm_cholesky_decompose            22 10.9    8.924    8.951    8.924    8.951
 wfi_extrapolate                     11  7.9    0.001    0.001    8.781    8.782
 grid_integrate_task_list           127 12.3    8.495    8.665    8.495    8.665
 pw_transfer                       1535 11.6    0.090    0.091    8.364    8.370
 fft_wrap_pw1pw2                   1281 12.7    0.011    0.011    8.132    8.137
 qs_energies_init_hamiltonians       11  5.9    0.002    0.004    7.918    7.919
 fft_wrap_pw1pw2_140                519 13.2    0.535    0.538    7.184    7.193
 mp_alltoall_d11v                  2401 14.1    6.984    7.120    6.984    7.120
 calculate_dm_sparse                127  9.5    0.001    0.001    6.589    6.674
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.420    6.496
 grid_collocate_task_list           127  9.7    6.287    6.321    6.287    6.321
 fft3d_ps                          1281 14.7    2.734    2.742    6.191    6.200
 copy_dbcsr_to_fm                   185 11.7    0.004    0.004    6.117    6.200
 density_rs2pw                      127  9.7    0.005    0.005    6.030    6.061
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=290.185000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2593.818182, yerr=173.547907
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.261318E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255669.
 MP_Allreduce         3059                   6274.
 MP_Sync                 4
 MP_Alltoall            54
 MP_ISendRecv          570                  19200.
 MP_Wait              1302
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.056    0.100   85.061   85.075
 qs_energies                          1  2.0    0.000    0.000   84.289   84.299
 ls_scf                               1  3.0    0.000    0.000   83.335   83.346
 dbcsr_multiply_generic             111  6.7    0.014    0.015   72.459   72.604
 multiply_cannon                    111  7.7    0.017    0.020   55.738   57.112
 multiply_cannon_loop               111  8.7    0.212    0.225   52.289   53.855
 ls_scf_main                          1  4.0    0.000    0.000   52.154   52.154
 density_matrix_trs4                  2  5.0    0.002    0.003   46.703   46.777
 ls_scf_init_scf                      1  4.0    0.000    0.000   28.139   28.140
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   27.045   27.107
 mp_waitall_1                     11316 10.9   22.551   25.396   22.551   25.396
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   24.967   24.988
 multiply_cannon_multrec           2664  9.7    8.113    9.050   15.390   17.260
 multiply_cannon_sync_h2d          2664  9.7   13.523   15.752   13.523   15.752
 make_m2s                           222  7.7    0.009    0.011   13.043   13.598
 make_images                        222  8.7    0.099    0.108   13.020   13.577
 multiply_cannon_metrocomm1        2664  9.7    0.009    0.010    9.716   12.519
 make_images_data                   222  9.7    0.004    0.005    7.636    8.119
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.516    8.080
 hybrid_alltoall_any                227 10.6    0.215    1.835    6.540    7.937
 dbcsr_mm_accdrv_process           4760 10.4    0.510    0.619    6.897    7.827
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.188    7.057    6.188    7.057
 calculate_norms                   4752  9.8    5.497    6.058    5.497    6.058
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.042    5.179
 mp_sum_l                           807  5.4    3.172    4.677    3.172    4.677
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.361    3.569
 multiply_cannon_metrocomm4        2442  9.7    0.011    0.014    2.051    3.279
 mp_irecv_dv                       6231 10.9    2.034    3.256    2.034    3.256
 arnoldi_extremal                     4  6.8    0.000    0.000    3.209    3.236
 arnoldi_normal_ev                    4  7.8    0.001    0.003    3.209    3.236
 make_images_sizes                  222  9.7    0.000    0.000    0.647    3.206
 mp_alltoall_i44                    222 10.7    0.647    3.206    0.647    3.206
 build_subspace                      16  8.4    0.009    0.012    3.115    3.117
 ls_scf_post                          1  4.0    0.000    0.000    3.042    3.053
 ls_scf_store_result                  1  5.0    0.000    0.000    2.843    2.890
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.406    2.822
 dbcsr_merge_single_wm              555 10.7    0.458    0.591    2.398    2.813
 make_images_pack                   222  9.7    2.207    2.626    2.209    2.628
 dbcsr_matrix_vector_mult           304  9.0    0.003    0.010    2.309    2.564
 dbcsr_sort_data                    658 11.4    2.198    2.535    2.198    2.535
 dbcsr_matrix_vector_mult_local     304 10.0    2.063    2.467    2.065    2.469
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.188    2.259
 buffer_matrices_ensure_size        222  8.7    1.748    2.072    1.748    2.072
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.743    1.744
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.734    1.735
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    1.734    1.735
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=85.075000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1142.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.089828E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  10339.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_ISendRecv          282                  57600.
 MP_Wait               828
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.073    0.126   91.868   91.894
 qs_energies                          1  2.0    0.000    0.000   90.864   90.876
 ls_scf                               1  3.0    0.000    0.001   89.440   89.451
 dbcsr_multiply_generic             111  6.7    0.015    0.016   75.412   75.774
 multiply_cannon                    111  7.7    0.028    0.040   53.488   57.108
 ls_scf_main                          1  4.0    0.000    0.000   55.085   55.096
 multiply_cannon_loop               111  8.7    0.116    0.123   50.147   53.245
 density_matrix_trs4                  2  5.0    0.002    0.003   49.404   49.595
 ls_scf_init_scf                      1  4.0    0.000    0.002   30.778   30.779
 mp_waitall_1                      9246 10.9   21.391   30.374   21.391   30.374
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   29.546   29.658
 multiply_cannon_multrec           1332  9.7   13.163   17.309   22.355   27.590
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.127   27.148
 multiply_cannon_metrocomm3        1332  9.7    0.006    0.007   11.974   21.023
 make_m2s                           222  7.7    0.006    0.008   15.276   15.979
 make_images                        222  8.7    1.571    1.918   15.246   15.951
 dbcsr_mm_accdrv_process           4041 10.4    0.295    0.454    8.793   10.376
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.384    9.922    8.384    9.922
 make_images_data                   222  9.7    0.004    0.004    8.813    9.752
 hybrid_alltoall_any                227 10.6    0.522    2.425    8.207    9.132
 mp_sum_l                           807  5.4    5.402    8.284    5.402    8.284
 multiply_cannon_metrocomm4        1221  9.7    0.006    0.008    3.250    7.718
 mp_irecv_dv                       3311 11.0    3.230    7.652    3.230    7.652
 calculate_norms                   2376  9.8    5.985    6.739    5.985    6.739
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.088    6.443
 multiply_cannon_sync_h2d          1332  9.7    4.786    5.812    4.786    5.812
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.127    5.369
 arnoldi_extremal                     4  6.8    0.000    0.000    4.695    4.720
 arnoldi_normal_ev                    4  7.8    0.001    0.004    4.695    4.720
 build_subspace                      16  8.4    0.014    0.021    4.436    4.439
 ls_scf_post                          1  4.0    0.001    0.006    3.577    3.588
 dbcsr_matrix_vector_mult           304  9.0    0.005    0.016    3.183    3.407
 ls_scf_store_result                  1  5.0    0.000    0.000    3.260    3.386
 dbcsr_matrix_vector_mult_local     304 10.0    2.772    3.257    2.774    3.258
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    1.228    2.811
 ls_scf_dm_to_ks                      2  5.0    0.000    0.005    2.520    2.603
 mp_allgather_i34                   111  8.7    1.045    2.533    1.045    2.533
 make_images_pack                   222  9.7    2.019    2.438    2.021    2.440
 dbcsr_sort_data                    436 11.2    1.796    2.021    1.796    2.021
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.887    1.889
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.874    1.876
 qs_ks_build_kohn_sham_matrix         3  8.3    0.004    0.011    1.873    1.876
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=91.894000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1699.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.710307E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265470.
 MP_Allreduce         3058                  11181.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_ISendRecv          186                  57600.
 MP_Wait               732
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.078    0.131   94.678   94.679
 qs_energies                          1  2.0    0.000    0.000   93.704   93.707
 ls_scf                               1  3.0    0.000    0.000   92.316   92.319
 dbcsr_multiply_generic             111  6.7    0.015    0.016   77.049   77.319
 ls_scf_main                          1  4.0    0.000    0.000   57.987   57.992
 multiply_cannon                    111  7.7    0.038    0.080   53.037   57.106
 multiply_cannon_loop               111  8.7    0.100    0.110   49.433   53.383
 density_matrix_trs4                  2  5.0    0.002    0.003   52.000   52.216
 mp_waitall_1                      7374 11.0   24.243   34.042   24.243   34.042
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.745   30.747
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.637   29.695
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.249   27.266
 multiply_cannon_multrec            888  9.7   12.558   15.253   21.070   24.377
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   11.315   23.049
 make_m2s                           222  7.7    0.006    0.006   17.148   18.395
 make_images                        222  8.7    1.967    2.280   17.110   18.358
 make_images_data                   222  9.7    0.003    0.004    9.802   10.833
 hybrid_alltoall_any                227 10.6    0.618    2.829    9.458   10.779
 dbcsr_mm_accdrv_process           3754 10.4    0.241    0.415    8.031    9.241
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.660    8.826    7.660    8.826
 mp_sum_l                           807  5.4    5.394    8.699    5.394    8.699
 multiply_cannon_sync_h2d           888  9.7    5.995    7.692    5.995    7.692
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.462    7.055
 mp_irecv_dv                       2335 11.1    2.446    7.010    2.446    7.010
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.058    6.817
 multiply_cannon_metrocomm1         888  9.7    0.002    0.003    3.737    6.763
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.063    5.257
 arnoldi_extremal                     4  6.8    0.000    0.000    5.221    5.237
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.220    5.237
 build_subspace                      16  8.4    0.014    0.020    4.917    4.924
 calculate_norms                   1584  9.8    4.240    4.571    4.240    4.571
 mp_allgather_i34                   111  8.7    1.436    3.960    1.436    3.960
 dbcsr_matrix_vector_mult           304  9.0    0.005    0.016    3.493    3.811
 dbcsr_matrix_vector_mult_local     304 10.0    3.041    3.599    3.043    3.601
 ls_scf_post                          1  4.0    0.000    0.000    3.584    3.588
 ls_scf_store_result                  1  5.0    0.000    0.000    3.323    3.408
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.769    2.872
 make_images_sizes                  222  9.7    0.000    0.000    1.033    2.222
 mp_alltoall_i44                    222 10.7    1.033    2.221    1.033    2.221
 dbcsr_sort_data                    325 11.1    1.884    2.121    1.884    2.121
 make_images_pack                   222  9.7    1.812    2.117    1.815    2.119
 dbcsr_data_release                9322 10.9    1.303    1.938    1.303    1.938
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=94.679000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2179.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.340837E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  13371.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_ISendRecv          138                  86400.
 MP_Wait               600
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.113    0.234   97.885   97.888
 qs_energies                          1  2.0    0.000    0.000   96.601   96.615
 ls_scf                               1  3.0    0.000    0.001   94.953   94.967
 dbcsr_multiply_generic             111  6.7    0.017    0.017   78.757   79.024
 ls_scf_main                          1  4.0    0.000    0.000   59.066   59.067
 multiply_cannon                    111  7.7    0.055    0.117   51.600   56.324
 density_matrix_trs4                  2  5.0    0.002    0.003   53.072   53.173
 multiply_cannon_loop               111  8.7    0.114    0.128   46.582   49.881
 ls_scf_init_scf                      1  4.0    0.009    0.023   32.632   32.633
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   31.424   31.513
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   28.924   28.936
 mp_waitall_1                      6438 11.0   23.007   28.606   23.007   28.606
 multiply_cannon_multrec           1332  9.7   14.100   17.153   21.908   24.520
 make_m2s                           222  7.7    0.007    0.008   21.242   22.631
 make_images                        222  8.7    3.130    3.585   21.192   22.583
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.003    9.331   16.964
 make_images_data                   222  9.7    0.004    0.004   11.902   13.565
 hybrid_alltoall_any                227 10.6    0.798    3.771   11.297   13.092
 dbcsr_mm_accdrv_process           3641 10.4    0.196    0.407    7.455    8.971
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.082    8.553    7.082    8.553
 mp_sum_l                           807  5.4    4.397    8.289    4.397    8.289
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.335    6.542
 multiply_cannon_sync_h2d          1332  9.7    5.545    6.261    5.545    6.261
 multiply_cannon_metrocomm4        1110  9.7    0.004    0.006    2.083    5.956
 mp_irecv_dv                       3229 10.9    2.060    5.889    2.060    5.889
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.602    5.435
 arnoldi_extremal                     4  6.8    0.000    0.000    5.299    5.312
 arnoldi_normal_ev                    4  7.8    0.001    0.004    5.299    5.312
 build_subspace                      16  8.4    0.015    0.021    4.955    4.963
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.524    4.739
 calculate_norms                   2376  9.8    4.177    4.554    4.177    4.554
 mp_allgather_i34                   111  8.7    2.136    4.383    2.136    4.383
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.016    3.615    3.915
 dbcsr_matrix_vector_mult_local     304 10.0    3.186    3.690    3.188    3.691
 dbcsr_sort_data                    658 11.4    3.061    3.371    3.061    3.371
 ls_scf_post                          1  4.0    0.002    0.013    3.255    3.269
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.818    3.258
 dbcsr_merge_single_wm              555 10.7    0.536    0.664    2.810    3.250
 ls_scf_store_result                  1  5.0    0.000    0.000    3.002    3.049
 ls_scf_dm_to_ks                      2  5.0    0.000    0.001    2.886    2.943
 dbcsr_data_release               10477 10.7    1.560    2.355    1.560    2.355
 dbcsr_finalize                     304  7.8    0.049    0.061    1.784    2.000
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=97.888000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2726.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.615889E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265558.
 MP_Allreduce         3049                  15663.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_ISendRecv           90                 115200.
 MP_Wait               573
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.092    0.104   99.860   99.861
 qs_energies                          1  2.0    0.000    0.000   98.442   98.446
 ls_scf                               1  3.0    0.000    0.000   96.484   96.488
 dbcsr_multiply_generic             111  6.7    0.017    0.018   78.251   78.455
 ls_scf_main                          1  4.0    0.000    0.000   62.295   62.296
 multiply_cannon                    111  7.7    0.102    0.173   55.679   61.228
 density_matrix_trs4                  2  5.0    0.002    0.003   55.272   55.368
 multiply_cannon_loop               111  8.7    0.069    0.077   51.164   52.931
 mp_waitall_1                      5481 11.0   27.023   32.216   27.023   32.216
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.582   30.585
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.420   29.460
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.253   27.260
 multiply_cannon_multrec            444  9.7   14.097   16.470   21.097   24.442
 make_m2s                           222  7.7    0.004    0.005   17.789   20.287
 make_images                        222  8.7    3.718    4.392   17.727   20.226
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   12.052   16.931
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    5.660   15.294
 make_images_data                   222  9.7    0.003    0.004   10.107   12.502
 hybrid_alltoall_any                227 10.6    0.788    3.776    9.817   12.249
 multiply_cannon_sync_h2d           444  9.7    6.572    8.805    6.572    8.805
 dbcsr_mm_accdrv_process           3003 10.4    0.166    0.343    6.703    7.842
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.387    7.493    6.387    7.493
 mp_allgather_i34                   111  8.7    2.744    6.969    2.744    6.969
 arnoldi_extremal                     4  6.8    0.000    0.000    5.821    5.829
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.821    5.829
 build_subspace                      16  8.4    0.015    0.019    5.436    5.444
 mp_sum_l                           807  5.4    2.829    4.859    2.829    4.859
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.597    4.751
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.605    4.564
 mp_irecv_dv                       1241 11.2    1.586    4.541    1.586    4.541
 dbcsr_matrix_vector_mult           304  9.0    0.007    0.016    4.154    4.372
 dbcsr_matrix_vector_mult_local     304 10.0    3.651    4.110    3.653    4.112
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.962    3.804
 calculate_norms                    792  9.8    3.525    3.677    3.525    3.677
 ls_scf_post                          1  4.0    0.000    0.000    3.608    3.612
 make_images_sizes                  222  9.7    0.000    0.000    1.047    3.601
 mp_alltoall_i44                    222 10.7    1.047    3.601    1.047    3.601
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.398    3.507
 ls_scf_store_result                  1  5.0    0.000    0.000    3.382    3.439
 dbcsr_finalize                     304  7.8    0.062    0.077    2.199    2.266
 dbcsr_merge_all                    275  8.9    0.473    0.524    2.047    2.100
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=99.861000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3661.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/3afb8bf709f96407a7e9894ade69f93aea585134_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.759861E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284111.
 MP_Allreduce         3043                  21950.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_ISendRecv           84                 732600.
 MP_Wait               309
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.181    0.222  108.363  108.367
 qs_energies                          1  2.0    0.000    0.000  106.445  106.457
 ls_scf                               1  3.0    0.000    0.000  103.512  103.524
 dbcsr_multiply_generic             111  6.7    0.023    0.027   77.515   77.624
 ls_scf_main                          1  4.0    0.000    0.000   65.791   65.792
 density_matrix_trs4                  2  5.0    0.002    0.003   56.890   56.950
 multiply_cannon                    111  7.7    0.150    0.246   49.904   52.109
 multiply_cannon_loop               111  8.7    0.067    0.070   46.379   47.199
 ls_scf_init_scf                      1  4.0    0.000    0.000   34.030   34.031
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   32.692   32.704
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.857   29.869
 mp_waitall_1                      4569 11.1   22.259   26.079   22.259   26.079
 make_m2s                           222  7.7    0.005    0.005   24.025   25.091
 make_images                        222  8.7    4.575    4.982   23.919   24.983
 multiply_cannon_multrec            444  9.7   17.934   18.548   22.521   23.075
 hybrid_alltoall_any                227 10.6    1.657    3.613   13.074   15.783
 make_images_data                   222  9.7    0.003    0.003   13.255   15.764
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.402   10.961
 multiply_cannon_sync_h2d           444  9.7    8.846    8.902    8.846    8.902
 arnoldi_extremal                     4  6.8    0.000    0.000    7.419    7.431
 arnoldi_normal_ev                    4  7.8    0.003    0.009    7.419    7.431
 build_subspace                      16  8.4    0.026    0.035    6.841    6.850
 dbcsr_matrix_vector_mult           304  9.0    0.009    0.025    5.415    5.574
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.045    5.290
 dbcsr_matrix_vector_mult_local     304 10.0    4.960    5.243    4.962    5.246
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    4.848    4.929
 dbcsr_mm_accdrv_process           1814 10.4    0.232    0.313    4.418    4.551
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.117    4.253    4.117    4.253
 mp_allgather_i34                   111  8.7    1.180    3.717    1.180    3.717
 ls_scf_post                          1  4.0    0.000    0.000    3.691    3.703
 make_images_sizes                  222  9.7    0.000    0.000    1.435    3.561
 mp_alltoall_i44                    222 10.7    1.435    3.561    1.435    3.561
 ls_scf_store_result                  1  5.0    0.000    0.000    3.419    3.428
 calculate_norms                    792  9.8    3.241    3.283    3.241    3.283
 dbcsr_finalize                     304  7.8    0.082    0.089    3.078    3.134
 dbcsr_merge_all                    275  8.9    0.890    0.923    2.864    2.915
 dbcsr_complete_redistribute          5  7.6    1.443    1.484    2.783    2.912
 qs_energies_init_hamiltonians        1  3.0    0.001    0.001    2.902    2.902
 dbcsr_data_release               12724 10.6    2.346    2.887    2.346    2.887
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.425    2.569
 dbcsr_sort_data                    325 11.1    2.436    2.499    2.436    2.499
 dbcsr_new_transposed                 4  7.5    0.269    0.298    2.375    2.390
 dbcsr_frobenius_norm                74  6.6    2.056    2.132    2.188    2.230
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.213    2.214
 dbcsr_add_d                        103  6.2    0.000    0.000    2.132    2.207
 dbcsr_add_anytype                  103  7.2    0.860    0.892    2.132    2.207
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=108.367000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=6873.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 3afb8bf709f96407a7e9894ade69f93aea585134
Summary: empty
Status: OK