=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: c17f64ee903975497a1c6c096a904aa8b2310d59


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10,
#              COSMA 2.6.2, ELPA 2022.11.001, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.1.0, LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.8.1,
#              SIRIUS 7.3.2, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Author: Matthias Krack (12.01.2023)
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.2
USE_ELPA       := 2022.11.001
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.1.0
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.8.1
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.3.2
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.4
HDF5_VER       := 1.12.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   HDF5_VER       := $(strip $(HDF5_VER))
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(HDF5_VER)/lib
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__HDF5
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta_prefixed_scalapack.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/01
 job id: 44562181
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/02
 job id: 44562182
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/03
 job id: 44562183
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/04
 job id: 44562186
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/05
 job id: 44562187
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/06
 job id: 44562188
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/07
 job id: 44562190
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/08
 job id: 44562191
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/09
 job id: 44562192
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/10
 job id: 44562193
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/11
 job id: 44562194
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/12
 job id: 44562195
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/13
 job id: 44562196
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/14
 job id: 44562197
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/15
 job id: 44562198
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/16
 job id: 44562199
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/17
 job id: 44562200
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/18
 job id: 44562202
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/19
 job id: 44562204
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/20
 job id: 44562205
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/21
 job id: 44562209
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/22
 job id: 44562212
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/23
 job id: 44562214
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/24
 job id: 44562215
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/25
 job id: 44562216
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/26
 job id: 44562217
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          344                      9.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.054    0.066  137.142  137.143
 farming_run                          1  2.0  135.787  135.788  137.080  137.090
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.459618E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              228                1113141.
 MP_Allreduce          485                2282278.
 MP_Sync                27
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split           8
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.027  116.501  116.502
 qs_energies                          1  2.0    0.000    0.000  115.999  116.001
 mp2_main                             1  3.0    0.000    0.000  113.943  113.945
 mp2_gpw_main                         1  4.0    0.032    0.038  113.017  113.019
 mp2_ri_gpw_compute_in                1  5.0    0.204    0.205   93.804   94.330
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.004   55.479   56.007
 mp2_eri_3c_integrate_gpw           272  7.0    0.152    0.164   41.793   47.255
 get_2c_integrals                     1  6.0    0.000    0.003   37.412   38.120
 integrate_v_rspace                 273  8.0    0.433    0.448   25.157   30.346
 pw_transfer                       6555 10.6    0.375    0.398   27.423   27.947
 grid_integrate_task_list           273  9.0   20.968   26.648   20.968   26.648
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.049   26.087   26.634
 fft_wrap_pw1pw2_100               2178 12.4    1.185    1.365   23.594   24.148
 compute_2c_integrals                 1  7.0    0.015    0.015   19.668   19.668
 rpa_ri_compute_en                    1  5.0    0.026    0.041   19.094   19.359
 compute_2c_integrals_loop_lm         1  8.0    0.021    0.022   18.904   19.337
 mp2_eri_2c_integrate_gpw             1  9.0    2.380    2.430   18.883   19.316
 cp_fm_cholesky_decompose            12  8.2   17.751   18.480   17.751   18.480
 cholesky_decomp                      1  7.0    0.000    0.000   16.592   17.326
 fft3d_s                           5443 13.4   16.156   16.637   16.178   16.659
 ao_to_mo_and_store_B_mult_1        272  7.0   10.860   15.606   10.860   15.606
 calculate_wavefunction             272  8.0    5.430    5.586   12.559   13.168
 rpa_num_int                          1  6.0    0.002    0.027   10.713   10.713
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.566   10.641
 calc_mat_Q                           8  8.0    0.000    0.000    9.356    9.480
 contract_S_to_Q                      8  9.0    0.000    0.000    8.780    8.937
 calc_potential_gpw                 544  9.5    0.005    0.006    8.248    8.605
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.368    8.545
 parallel_gemm_fm_cosma              14 10.1    8.368    8.545    8.368    8.545
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.002    8.214    8.489
 potential_pw2rs                    545 10.0    0.106    0.109    7.699    8.343
 collocate_single_gaussian          272 10.0    0.039    0.041    7.465    7.739
 create_integ_mat                     1  6.0    0.022    0.027    7.720    7.720
 array2fm                             1  7.0    0.000    0.000    6.770    7.151
 pw_scatter_s                      2720 13.7    4.436    4.645    4.436    4.645
 pw_gather_s                       2722 13.2    3.887    4.249    3.887    4.249
 array2fm_buffer_send                 1  8.0    2.987    3.129    2.987    3.129
 pw_poisson_solve                   545 10.5    1.119    1.179    2.161    2.361
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=113.016718, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2730.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          344                     10.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.030    0.038  405.251  405.252
 farming_run                          1  2.0  403.379  403.391  405.204  405.209
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827128576       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788821       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.224819E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              703                 408373.
 MP_Allreduce         1821                  23730.
 MP_Sync                38
 MP_Alltoall            77                2368424.
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.046  213.466  213.466
 qs_energies                          1  2.0    0.000    0.000  213.027  213.046
 scf_env_do_scf                       1  3.0    0.000    0.000  107.510  107.510
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  106.583  106.593
 rebuild_ks_matrix                    4  6.0    0.000    0.000  106.437  106.444
 qs_ks_build_kohn_sham_matrix         4  7.0    0.058    0.065  106.436  106.444
 hfx_ks_matrix                        4  8.0    0.001    0.001  106.031  106.035
 integrate_four_center                4  9.0    0.145    0.455  106.031  106.034
 mp2_main                             1  3.0    0.003    0.027  105.226  105.246
 mp2_gpw_main                         1  4.0    0.071    0.330  104.275  104.289
 integrate_four_center_main           4 10.0    0.084    0.476   96.750  100.651
 integrate_four_center_bin          264 11.0   96.666  100.633   96.666  100.633
 init_scf_loop                        1  4.0    0.000    0.000   92.954   92.954
 mp2_ri_gpw_compute_in                1  5.0    0.085    0.148   75.746   76.892
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.004   54.828   55.976
 mp2_eri_3c_integrate_gpw            91  7.0    0.145    0.165   42.519   47.467
 integrate_v_rspace                  95  8.0    0.396    0.562   28.891   33.661
 pw_transfer                       2240 10.6    0.145    0.161   29.875   30.226
 mp2_ri_gpw_compute_en                1  5.0    0.114    0.437   28.175   30.107
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.020   28.909   29.281
 grid_integrate_task_list            95  9.0   24.163   29.121   24.163   29.121
 ao_to_mo_and_store_B_mult_1         91  7.0   10.553   29.010   10.553   29.010
 fft_wrap_pw1pw2_100                730 12.4    1.243    1.375   26.603   27.040
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.842    2.045   25.896   25.908
 get_2c_integrals                     1  6.0    0.006    0.049   20.791   20.866
 compute_2c_integrals                 1  7.0    0.009    0.024   19.767   19.808
 compute_2c_integrals_loop_lm         1  8.0    0.018    0.053   18.770   19.323
 mp2_eri_2c_integrate_gpw             1  9.0    1.773    1.984   18.752   19.270
 fft3d_s                           1823 13.4   18.421   18.847   18.435   18.860
 scf_env_do_scf_inner_loop            4  4.0    0.003    0.149   14.553   14.553
 calculate_wavefunction              91  8.0    2.025    2.062    9.754   10.024
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.558    0.588    8.917    9.948
 mp2_ri_gpw_compute_en_comm          22  7.0    0.495    0.515    8.743    9.449
 local_gemm                         172  8.0    8.359    9.397    8.359    9.397
 potential_pw2rs                    186 10.0    0.033    0.034    8.604    9.080
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.002    8.164    8.497
 calc_potential_gpw                 182  9.5    0.002    0.002    7.890    8.195
 collocate_single_gaussian           91 10.0    0.019    0.079    7.822    8.120
 mp_sendrecv_dm3                   2068  8.0    6.784    7.487    6.784    7.487
 mp_sync                             38 10.4    3.955    6.770    3.955    6.770
 mp2_ri_gpw_compute_en_ener         172  7.0    6.349    6.436    6.349    6.436
 pw_gather_s                        912 13.2    4.896    5.224    4.896    5.224
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=104.258629, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1512.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             452.747264E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62385.
 MP_Allreduce        10249                    271.
 MP_Sync               580
 MP_Alltoall          2083                 589622.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.040   53.738   53.739
 qs_mol_dyn_low                       1  2.0    0.003    0.004   53.230   53.238
 qs_forces                           11  3.9    0.006    0.013   53.170   53.171
 qs_energies                         11  4.9    0.001    0.002   51.650   51.666
 scf_env_do_scf                      11  5.9    0.000    0.001   45.442   45.442
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   43.320   43.320
 dbcsr_multiply_generic            2286 12.5    0.095    0.098   34.447   35.032
 qs_scf_new_mos                     108  7.5    0.000    0.001   33.126   33.430
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   33.126   33.430
 ot_scf_mini                        108  9.5    0.002    0.002   31.441   31.657
 multiply_cannon                   2286 13.5    0.180    0.187   26.527   28.354
 multiply_cannon_loop              2286 14.5    1.489    1.558   25.846   27.674
 velocity_verlet                     10  3.0    0.001    0.001   26.589   26.590
 ot_mini                            108 10.5    0.001    0.001   20.086   20.340
 qs_ot_get_derivative               108 11.5    0.001    0.001   17.064   17.268
 mp_waitall_1                    245248 16.5    9.086   15.472    9.086   15.472
 multiply_cannon_metrocomm3       54864 15.5    0.068    0.074    6.398   13.830
 multiply_cannon_multrec          54864 15.5    4.164    6.382    7.683   11.151
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.978    8.155
 qs_ks_build_kohn_sham_matrix       119  9.3    0.016    0.035    7.978    8.154
 mp_sum_l                          7207 12.9    5.973    7.716    5.973    7.716
 multiply_cannon_sync_h2d         54864 15.5    5.823    7.481    5.823    7.481
 qs_ks_update_qs_env                119  7.6    0.001    0.001    7.018    7.177
 qs_ot_get_p                        119 10.4    0.001    0.001    6.691    7.030
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.912    6.322
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    5.758    5.877
 init_scf_run                        11  5.9    0.000    0.001    4.920    4.920
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    4.920    4.920
 dbcsr_mm_accdrv_process          76910 16.1    1.199    1.866    3.442    4.790
 sum_up_and_integrate               119 10.3    0.012    0.015    4.548    4.554
 integrate_v_rspace                 119 11.3    0.002    0.002    4.536    4.543
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.240    4.345
 calculate_rho_elec                 119  8.7    0.011    0.017    4.240    4.344
 multiply_cannon_metrocomm1       54864 15.5    0.051    0.056    2.018    3.849
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    3.695    3.756
 calculate_dm_sparse                119  9.5    0.000    0.000    3.105    3.229
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.863    3.103
 apply_single                       119 13.6    0.000    0.000    2.863    3.102
 rs_pw_transfer                     974 11.9    0.011    0.012    2.787    2.892
 jit_kernel_multiply                 13 15.8    2.181    2.868    2.181    2.868
 cp_dbcsr_syevd                      50 12.0    0.002    0.003    2.839    2.840
 ot_diis_step                       108 11.5    0.006    0.006    2.730    2.730
 calculate_first_density_matrix       1  7.0    0.001    0.001    2.702    2.718
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.641    2.642
 cp_fm_redistribute_end              50 14.0    2.405    2.620    2.410    2.622
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.520    2.602
 cp_fm_diag_elpa_base                50 14.0    0.210    2.536    0.211    2.545
 density_rs2pw                      119  9.7    0.004    0.004    2.253    2.382
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.327    2.329
 acc_transpose_blocks             54864 15.5    0.234    0.253    1.740    2.219
 grid_integrate_task_list           119 12.3    2.022    2.151    2.022    2.151
 wfi_extrapolate                     11  7.9    0.001    0.001    2.145    2.145
 init_scf_loop                       11  6.9    0.000    0.000    2.104    2.104
 mp_sum_d                          4129 12.0    1.419    2.103    1.419    2.103
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.018    2.066
 pw_transfer                       1439 11.6    0.053    0.059    1.836    1.915
 potential_pw2rs                    119 12.3    0.004    0.004    1.889    1.903
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    1.760    1.842
 make_m2s                          4572 13.5    0.053    0.056    1.630    1.675
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.588    1.619
 fft3d_ps                          1201 14.6    0.372    0.475    1.530    1.604
 make_images                       4572 14.5    0.132    0.138    1.548    1.592
 mp_waitany                       12084 13.8    1.295    1.503    1.295    1.503
 mp_alltoall_d11v                  2130 13.8    1.297    1.486    1.297    1.486
 fft_wrap_pw1pw2_140                487 13.2    0.083    0.096    1.353    1.434
 grid_collocate_task_list           119  9.7    1.286    1.374    1.286    1.374
 dbcsr_dot_sd                      1205 11.9    0.051    0.062    0.782    1.180
 acc_transpose_blocks_kernels     54864 16.5    0.254    0.386    0.742    1.102
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=53.739000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=431.272727, yerr=1.052349
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             487.940096E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62664.
 MP_Allreduce        10226                    305.
 MP_Sync               104
 MP_Alltoall          2060                1030216.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.042    0.097   40.591   40.617
 qs_mol_dyn_low                       1  2.0    0.003    0.003   39.193   40.318
 qs_forces                           11  3.9    0.002    0.002   39.071   39.071
 qs_energies                         11  4.9    0.001    0.001   37.386   37.389
 scf_env_do_scf                      11  5.9    0.000    0.001   31.937   31.937
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   29.417   29.417
 dbcsr_multiply_generic            2286 12.5    0.100    0.103   21.727   22.124
 qs_scf_new_mos                     108  7.5    0.001    0.001   20.070   20.320
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   20.070   20.319
 ot_scf_mini                        108  9.5    0.002    0.003   19.178   19.351
 velocity_verlet                     10  3.0    0.001    0.001   18.511   18.513
 multiply_cannon                   2286 13.5    0.207    0.216   16.661   18.221
 multiply_cannon_loop              2286 14.5    0.901    0.984   15.574   17.095
 ot_mini                            108 10.5    0.001    0.001   11.926   12.164
 mp_waitall_1                    200699 16.5    5.806   11.277    5.806   11.277
 multiply_cannon_metrocomm3       27432 15.5    0.067    0.070    4.327    9.794
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.470    9.650
 multiply_cannon_multrec          27432 15.5    1.955    4.444    6.006    9.047
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.309    7.450
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    7.309    7.450
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.473    6.600
 dbcsr_mm_accdrv_process          47894 16.0    3.192    5.332    3.982    5.923
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.706    4.579
 qs_ot_get_p                        119 10.4    0.001    0.001    4.352    4.577
 sum_up_and_integrate               119 10.3    0.024    0.027    4.261    4.274
 integrate_v_rspace                 119 11.3    0.002    0.002    4.236    4.250
 init_scf_run                        11  5.9    0.000    0.001    4.249    4.249
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    4.249    4.249
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.074    4.162
 apply_single                       119 13.6    0.000    0.000    3.074    4.161
 mp_sum_l                          7207 12.9    2.126    4.122    2.126    4.122
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.888    3.929
 calculate_rho_elec                 119  8.7    0.021    0.024    3.887    3.929
 rs_pw_transfer                     974 11.9    0.010    0.011    2.662    3.101
 qs_ot_p2m_diag                      50 11.0    0.008    0.012    2.917    2.936
 multiply_cannon_sync_h2d         27432 15.5    2.184    2.802    2.184    2.802
 calculate_first_density_matrix       1  7.0    0.001    0.001    2.721    2.722
 make_m2s                          4572 13.5    0.052    0.054    2.494    2.722
 density_rs2pw                      119  9.7    0.004    0.004    2.213    2.669
 make_images                       4572 14.5    0.199    0.235    2.405    2.632
 init_scf_loop                       11  6.9    0.000    0.000    2.498    2.498
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.476    2.477
 ot_diis_step                       108 11.5    0.011    0.011    2.403    2.404
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.142    2.233
 calculate_dm_sparse                119  9.5    0.000    0.001    2.108    2.183
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.138    2.139
 cp_fm_redistribute_end              50 14.0    1.771    2.115    1.775    2.117
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.098    2.100
 pw_transfer                       1439 11.6    0.065    0.072    2.029    2.078
 cp_fm_diag_elpa_base                50 14.0    0.327    2.000    0.340    2.059
 potential_pw2rs                    119 12.3    0.006    0.006    1.981    1.997
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.009    1.938    1.989
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.902    1.941
 grid_integrate_task_list           119 12.3    1.844    1.940    1.844    1.940
 jit_kernel_multiply                  9 16.2    0.738    1.931    0.738    1.931
 acc_transpose_blocks             27432 15.5    0.111    0.117    1.331    1.722
 fft3d_ps                          1201 14.6    0.513    0.567    1.639    1.686
 prepare_preconditioner              11  7.9    0.000    0.000    1.553    1.581
 make_preconditioner                 11  8.9    0.000    0.000    1.553    1.581
 fft_wrap_pw1pw2_140                487 13.2    0.080    0.086    1.513    1.563
 make_images_data                  4572 15.5    0.044    0.050    1.139    1.543
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.454    1.511
 wfi_extrapolate                     11  7.9    0.001    0.001    1.476    1.476
 hybrid_alltoall_any               4725 16.4    0.050    0.112    0.992    1.469
 grid_collocate_task_list           119  9.7    1.226    1.363    1.226    1.363
 mp_alltoall_d11v                  2130 13.8    1.181    1.357    1.181    1.357
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.324    1.330
 mp_allgather_i34                  2286 14.5    0.532    1.299    0.532    1.299
 md_write_output                     11  3.9    0.026    1.215    0.027    1.227
 md_output                           10  3.0    0.000    0.000    0.028    1.225
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.177    1.223
 mp_max_l                            33  2.8    1.126    1.150    1.126    1.150
 acc_transpose_blocks_kernels     27432 16.5    0.182    0.272    0.795    1.094
 mp_sum_d                          4129 12.0    0.585    1.007    0.585    1.007
 rs_pw_transfer_RS2PW_140           130 11.5    0.140    0.149    0.539    0.983
 mp_waitany                        5720 13.7    0.514    0.970    0.514    0.970
 qs_energies_init_hamiltonians       11  5.9    0.000    0.002    0.935    0.936
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.897    0.910
 mp_alltoall_z22v                  1201 16.6    0.726    0.824    0.726    0.824
 jit_kernel_transpose                 5 15.5    0.612    0.823    0.612    0.823
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=40.617000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=465.272727, yerr=1.542778
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             524.767232E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62660.
 MP_Allreduce        10225                    303.
 MP_Sync               104
 MP_Alltoall          1821                1999746.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_comm_split          50
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.092    0.209   34.385   34.401
 qs_mol_dyn_low                       1  2.0    0.005    0.019   33.844   33.887
 qs_forces                           11  3.9    0.011    0.073   33.685   33.720
 qs_energies                         11  4.9    0.004    0.014   32.045   32.079
 scf_env_do_scf                      11  5.9    0.002    0.011   25.875   25.876
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   23.210   23.210
 dbcsr_multiply_generic            2286 12.5    0.095    0.098   16.598   16.705
 velocity_verlet                     10  3.0    0.006    0.026   15.555   15.557
 qs_scf_new_mos                     108  7.5    0.001    0.001   14.832   14.865
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   14.832   14.864
 multiply_cannon                   2286 13.5    0.196    0.205   13.260   14.124
 ot_scf_mini                        108  9.5    0.003    0.004   14.100   14.118
 multiply_cannon_loop              2286 14.5    0.636    0.660   12.469   13.368
 ot_mini                            108 10.5    0.001    0.001    8.722    8.738
 multiply_cannon_multrec          18288 15.5    1.959    3.004    7.012    7.398
 qs_ot_get_derivative               108 11.5    0.001    0.002    7.212    7.230
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.497    6.519
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.015    6.497    6.518
 dbcsr_mm_accdrv_process          38222 16.0    4.242    5.591    4.969    5.819
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.744    5.764
 init_scf_run                        11  5.9    0.000    0.001    4.272    4.273
 scf_env_initial_rho_setup           11  6.9    0.000    0.002    4.272    4.273
 sum_up_and_integrate               119 10.3    0.029    0.030    4.043    4.047
 integrate_v_rspace                 119 11.3    0.002    0.003    4.013    4.021
 mp_waitall_1                    158411 16.6    2.670    3.727    2.670    3.727
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.562    3.569
 calculate_rho_elec                 119  8.7    0.030    0.031    3.561    3.568
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.696    3.335
 qs_ot_get_p                        119 10.4    0.001    0.001    3.221    3.249
 calculate_first_density_matrix       1  7.0    0.001    0.004    3.029    3.031
 init_scf_loop                       11  6.9    0.001    0.004    2.637    2.639
 rs_pw_transfer                     974 11.9    0.009    0.010    2.376    2.625
 multiply_cannon_metrocomm3       18288 15.5    0.044    0.045    1.442    2.418
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.057    2.368
 apply_single                       119 13.6    0.000    0.000    2.057    2.368
 density_rs2pw                      119  9.7    0.004    0.004    2.070    2.328
 qs_ot_p2m_diag                      50 11.0    0.012    0.012    2.138    2.143
 jit_kernel_multiply                 10 16.3    0.676    2.136    0.676    2.136
 pw_transfer                       1439 11.6    0.066    0.071    2.007    2.018
 make_m2s                          4572 13.5    0.044    0.045    1.813    1.938
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.009    1.914    1.927
 grid_integrate_task_list           119 12.3    1.807    1.917    1.807    1.917
 calculate_dm_sparse                119  9.5    0.000    0.000    1.877    1.893
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.876    1.878
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.877    1.877
 make_images                       4572 14.5    0.189    0.200    1.728    1.852
 potential_pw2rs                    119 12.3    0.007    0.008    1.790    1.796
 prepare_preconditioner              11  7.9    0.000    0.000    1.793    1.795
 make_preconditioner                 11  8.9    0.000    0.001    1.793    1.795
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.730    1.737
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.643    1.726
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.634    1.636
 cp_fm_redistribute_end              50 14.0    1.219    1.614    1.220    1.615
 fft3d_ps                          1201 14.6    0.524    0.544    1.591    1.607
 mp_sum_l                          7207 12.9    1.266    1.600    1.266    1.600
 cp_fm_diag_elpa_base                50 14.0    0.379    1.532    0.393    1.580
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.558    1.568
 multiply_cannon_sync_h2d         18288 15.5    1.364    1.553    1.364    1.553
 parallel_gemm_fm                    81  9.0    0.000    0.000    1.533    1.537
 parallel_gemm_fm_cosma              81 10.0    1.533    1.536    1.533    1.536
 fft_wrap_pw1pw2_140                487 13.2    0.091    0.095    1.523    1.536
 ot_diis_step                       108 11.5    0.013    0.024    1.485    1.494
 grid_collocate_task_list           119  9.7    1.214    1.367    1.214    1.367
 acc_transpose_blocks             18288 15.5    0.078    0.079    1.249    1.281
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.239    1.243
 wfi_extrapolate                     11  7.9    0.001    0.001    1.194    1.194
 qs_energies_init_hamiltonians       11  5.9    0.002    0.011    0.989    0.991
 multiply_cannon_metrocomm1       18288 15.5    0.028    0.029    0.368    0.985
 make_images_data                  4572 15.5    0.044    0.048    0.834    0.973
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.880    0.905
 hybrid_alltoall_any               4725 16.4    0.054    0.112    0.722    0.895
 qs_energies_compute_matrix_w        11  5.9    0.000    0.002    0.873    0.888
 calculate_w_matrix_ot               11  6.9    0.001    0.001    0.873    0.888
 make_basis_sm                       11  9.8    0.000    0.000    0.844    0.846
 acc_transpose_blocks_kernels     18288 16.5    0.212    0.219    0.805    0.830
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.824    0.826
 mp_alltoall_d11v                  2130 13.8    0.695    0.818    0.695    0.818
 mp_waitany                        9880 13.7    0.544    0.799    0.544    0.799
 rs_pw_transfer_RS2PW_140           130 11.5    0.120    0.123    0.538    0.796
 mp_alltoall_z22v                  1201 16.6    0.669    0.775    0.669    0.775
 cp_fm_cholesky_invert               11 10.9    0.756    0.760    0.756    0.760
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.657    0.720
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=34.401000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=497.454545, yerr=2.606413
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             559.620096E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62659.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_comm_split          50
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.023    0.044   36.415   36.416
 qs_mol_dyn_low                       1  2.0    0.003    0.003   36.117   36.125
 qs_forces                           11  3.9    0.002    0.002   36.056   36.057
 qs_energies                         11  4.9    0.001    0.001   34.306   34.310
 scf_env_do_scf                      11  5.9    0.000    0.001   29.024   29.025
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.006   25.492   25.492
 dbcsr_multiply_generic            2286 12.5    0.104    0.106   18.694   18.783
 velocity_verlet                     10  3.0    0.001    0.001   18.265   18.267
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.776   16.828
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.775   16.827
 ot_scf_mini                        108  9.5    0.002    0.003   15.813   15.867
 multiply_cannon                   2286 13.5    0.224    0.268   14.995   15.440
 multiply_cannon_loop              2286 14.5    0.936    0.967   13.992   14.465
 ot_mini                            108 10.5    0.001    0.001    9.714    9.776
 multiply_cannon_multrec          27432 15.5    2.438    3.163    8.906    9.278
 dbcsr_mm_accdrv_process          47916 15.9    5.501    7.497    6.376    7.971
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.870    7.920
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.729    6.781
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.728    6.781
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.961    6.007
 sum_up_and_integrate               119 10.3    0.035    0.038    3.938    3.946
 init_scf_run                        11  5.9    0.000    0.001    3.935    3.936
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.935    3.936
 integrate_v_rspace                 119 11.3    0.002    0.003    3.903    3.911
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.687    3.718
 calculate_rho_elec                 119  8.7    0.040    0.046    3.687    3.718
 qs_ot_get_p                        119 10.4    0.001    0.001    3.448    3.524
 init_scf_loop                       11  6.9    0.000    0.000    3.511    3.511
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.017    3.321
 mp_waitall_1                    137007 16.6    2.075    2.731    2.075    2.731
 prepare_preconditioner              11  7.9    0.000    0.000    2.656    2.663
 make_preconditioner                 11  8.9    0.000    0.000    2.656    2.663
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.262    2.592
 calculate_first_density_matrix       1  7.0    0.009    0.025    2.505    2.507
 make_m2s                          4572 13.5    0.054    0.056    2.297    2.422
 rs_pw_transfer                     974 11.9    0.009    0.010    2.182    2.362
 make_images                       4572 14.5    0.270    0.329    2.189    2.313
 density_rs2pw                      119  9.7    0.004    0.004    2.100    2.287
 calculate_dm_sparse                119  9.5    0.000    0.000    2.137    2.192
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.963    2.181
 apply_single                       119 13.6    0.000    0.000    1.963    2.180
 pw_transfer                       1439 11.6    0.066    0.072    2.129    2.168
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.101    2.110
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.035    2.080
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.995    2.019
 jit_kernel_multiply                 10 15.9    0.815    2.010    0.815    2.010
 grid_integrate_task_list           119 12.3    1.815    1.907    1.815    1.907
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.880    1.882
 ot_diis_step                       108 11.5    0.012    0.013    1.803    1.803
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.779    1.780
 fft3d_ps                          1201 14.6    0.561    0.616    1.710    1.751
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.686    1.701
 potential_pw2rs                    119 12.3    0.008    0.009    1.692    1.697
 fft_wrap_pw1pw2_140                487 13.2    0.089    0.096    1.649    1.696
 multiply_cannon_metrocomm3       27432 15.5    0.038    0.038    0.919    1.605
 acc_transpose_blocks             27432 15.5    0.114    0.117    1.522    1.563
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.518    1.520
 cp_fm_redistribute_end              50 14.0    1.004    1.490    1.005    1.491
 cp_fm_diag_elpa_base                50 14.0    0.463    1.421    0.483    1.459
 wfi_extrapolate                     11  7.9    0.001    0.001    1.380    1.380
 grid_collocate_task_list           119  9.7    1.221    1.365    1.221    1.365
 mp_sum_l                          7207 12.9    0.969    1.322    0.969    1.322
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.301    1.309
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.137    1.154
 cp_fm_upper_to_full                 72 13.5    0.804    1.119    0.804    1.119
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    1.084    1.086
 dbcsr_complete_redistribute        329 12.2    0.118    0.146    0.802    1.079
 make_images_data                  4572 15.5    0.045    0.049    0.892    1.037
 multiply_cannon_sync_h2d         27432 15.5    0.983    1.021    0.983    1.021
 hybrid_alltoall_any               4725 16.4    0.061    0.150    0.759    0.948
 mp_alltoall_d11v                  2130 13.8    0.772    0.936    0.772    0.936
 acc_transpose_blocks_kernels     27432 16.5    0.269    0.277    0.892    0.919
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.792    0.871
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.859    0.863
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.591    0.858
 cp_fm_cholesky_invert               11 10.9    0.827    0.830    0.827    0.830
 mp_alltoall_z22v                  1201 16.6    0.731    0.767    0.731    0.767
 mp_alltoall_i22                    627 13.8    0.435    0.732    0.435    0.732
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=36.416000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=529.818182, yerr=3.712832
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             605.761536E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_comm_split          50
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.040    0.429   30.058   30.433
 qs_mol_dyn_low                       1  2.0    0.003    0.003   29.795   29.802
 qs_forces                           11  3.9    0.002    0.003   29.739   29.740
 qs_energies                         11  4.9    0.001    0.001   28.013   28.016
 scf_env_do_scf                      11  5.9    0.000    0.001   22.074   22.074
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.006   19.399   19.399
 velocity_verlet                     10  3.0    0.001    0.001   14.603   14.606
 dbcsr_multiply_generic            2286 12.5    0.124    0.199   12.987   13.062
 qs_scf_new_mos                     108  7.5    0.001    0.001   11.195   11.220
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   11.194   11.219
 multiply_cannon                   2286 13.5    0.230    0.238   10.228   10.706
 ot_scf_mini                        108  9.5    0.002    0.002   10.517   10.542
 multiply_cannon_loop              2286 14.5    0.329    0.342    9.278    9.471
 multiply_cannon_multrec           9144 15.5    1.711    1.960    6.340    6.585
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.209    6.229
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.208    6.229
 ot_mini                            108 10.5    0.001    0.001    5.903    5.932
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.543    5.561
 dbcsr_mm_accdrv_process          12550 15.8    3.203    4.549    4.528    4.640
 qs_ot_get_derivative               108 11.5    0.001    0.001    4.606    4.631
 init_scf_run                        11  5.9    0.000    0.001    4.505    4.505
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    4.504    4.504
 sum_up_and_integrate               119 10.3    0.038    0.040    3.802    3.816
 integrate_v_rspace                 119 11.3    0.003    0.003    3.764    3.779
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.658    3.672
 calculate_rho_elec                 119  8.7    0.060    0.061    3.657    3.672
 calculate_first_density_matrix       1  7.0    0.028    0.032    3.345    3.347
 qs_ot_get_p                        119 10.4    0.001    0.001    2.836    2.875
 init_scf_loop                       11  6.9    0.000    0.000    2.653    2.654
 pw_transfer                       1439 11.6    0.066    0.069    2.263    2.283
 jit_kernel_multiply                 10 15.8    1.287    2.257    1.287    2.257
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.169    2.190
 density_rs2pw                      119  9.7    0.004    0.004    1.975    2.121
 make_m2s                          4572 13.5    0.034    0.035    1.928    2.110
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.084    2.085
 mp_waitall_1                    115863 16.7    1.490    2.054    1.490    2.054
 make_images                       4572 14.5    0.267    0.302    1.838    2.018
 grid_integrate_task_list           119 12.3    1.840    1.941    1.840    1.941
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.930    1.938
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.902    1.905
 rs_pw_transfer                     974 11.9    0.008    0.009    1.771    1.898
 calculate_dm_sparse                119  9.5    0.000    0.000    1.830    1.849
 fft3d_ps                          1201 14.6    0.565    0.577    1.828    1.846
 fft_wrap_pw1pw2_140                487 13.2    0.088    0.091    1.797    1.818
 prepare_preconditioner              11  7.9    0.000    0.000    1.772    1.776
 make_preconditioner                 11  8.9    0.000    0.000    1.772    1.776
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.703    1.704
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.660    1.689
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.638    1.649
 potential_pw2rs                    119 12.3    0.010    0.011    1.536    1.549
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.416    1.417
 cp_fm_redistribute_end              50 14.0    0.707    1.395    0.708    1.395
 grid_collocate_task_list           119  9.7    1.268    1.386    1.268    1.386
 cp_fm_diag_elpa_base                50 14.0    0.642    1.322    0.685    1.378
 ot_diis_step                       108 11.5    0.013    0.013    1.280    1.280
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.246    1.252
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    1.225    1.226
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.161    1.187
 apply_single                       119 13.6    0.000    0.000    1.161    1.187
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.172    1.184
 wfi_extrapolate                     11  7.9    0.001    0.001    1.109    1.109
 hybrid_alltoall_any               4725 16.4    0.062    0.174    0.811    1.069
 make_images_data                  4572 15.5    0.038    0.042    0.846    1.051
 acc_transpose_blocks              9144 15.5    0.039    0.040    1.015    1.027
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.867    0.919
 cp_fm_cholesky_invert               11 10.9    0.911    0.914    0.911    0.914
 mp_alltoall_d11v                  2130 13.8    0.799    0.892    0.799    0.892
 multiply_cannon_sync_h2d          9144 15.5    0.708    0.808    0.708    0.808
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.788    0.790
 acc_transpose_blocks_kernels      9144 16.5    0.118    0.121    0.772    0.786
 mp_alltoall_z22v                  1201 16.6    0.709    0.774    0.709    0.774
 parallel_gemm_fm                    81  9.0    0.000    0.000    0.768    0.769
 parallel_gemm_fm_cosma              81 10.0    0.768    0.769    0.768    0.769
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.732    0.739
 make_basis_sm                       11  9.8    0.000    0.000    0.730    0.731
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    0.677    0.728
 mp_allgather_i34                  2286 14.5    0.260    0.698    0.260    0.698
 yz_to_x                            606 15.1    0.266    0.276    0.652    0.678
 jit_kernel_transpose                 5 15.6    0.654    0.670    0.654    0.670
 multiply_cannon_metrocomm3        9144 15.5    0.019    0.019    0.317    0.657
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=30.433000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=571.090909, yerr=7.537235
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             753.967104E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63729.
 MP_Allreduce        10074                    433.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.037   43.483   43.485
 qs_mol_dyn_low                       1  2.0    0.003    0.003   42.852   43.141
 qs_forces                           11  3.9    0.002    0.003   42.735   42.737
 qs_energies                         11  4.9    0.001    0.001   40.730   40.735
 scf_env_do_scf                      11  5.9    0.001    0.001   33.950   33.951
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   25.939   25.940
 velocity_verlet                     10  3.0    0.001    0.001   23.572   23.579
 dbcsr_multiply_generic            2286 12.5    0.198    0.210   18.187   18.431
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.988   16.084
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.987   16.083
 multiply_cannon                   2286 13.5    0.301    0.308   14.174   15.054
 ot_scf_mini                        108  9.5    0.002    0.002   14.904   15.006
 multiply_cannon_loop              2286 14.5    0.344    0.349   12.870   13.773
 ot_mini                            108 10.5    0.001    0.001    8.927    9.046
 multiply_cannon_multrec           9144 15.5    3.409    4.837    8.853    9.037
 init_scf_loop                       11  6.9    0.000    0.000    7.985    7.988
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.544    7.688
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    7.543    7.687
 qs_ot_get_derivative               108 11.5    0.001    0.001    6.899    7.001
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.836    6.967
 prepare_preconditioner              11  7.9    0.000    0.000    6.886    6.900
 make_preconditioner                 11  8.9    0.000    0.000    6.885    6.900
 dbcsr_mm_accdrv_process          12550 15.8    4.321    5.704    5.322    6.785
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.462    6.780
 init_scf_run                        11  5.9    0.000    0.001    4.680    4.680
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    4.680    4.680
 cp_fm_upper_to_full                 72 14.2    3.151    4.517    3.151    4.517
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.319    4.329
 calculate_rho_elec                 119  8.7    0.118    0.121    4.318    4.328
 sum_up_and_integrate               119 10.3    0.065    0.067    4.085    4.092
 integrate_v_rspace                 119 11.3    0.003    0.003    4.020    4.026
 mp_waitall_1                     94719 16.7    2.535    3.507    2.535    3.507
 qs_ot_get_p                        119 10.4    0.001    0.001    3.225    3.357
 calculate_first_density_matrix       1  7.0    0.001    0.002    3.204    3.209
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.597    2.954
 pw_transfer                       1439 11.6    0.069    0.069    2.865    2.870
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.766    2.772
 dbcsr_complete_redistribute        329 12.2    0.286    0.293    1.951    2.756
 make_m2s                          4572 13.5    0.038    0.038    2.367    2.556
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.649    2.440
 make_images                       4572 14.5    0.351    0.379    2.246    2.435
 fft3d_ps                          1201 14.6    0.597    0.605    2.388    2.393
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.347    2.348
 fft_wrap_pw1pw2_140                487 13.2    0.096    0.099    2.330    2.339
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    1.389    2.318
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.135    2.298
 apply_single                       119 13.6    0.000    0.000    2.135    2.297
 calculate_dm_sparse                119  9.5    0.000    0.000    2.275    2.294
 density_rs2pw                      119  9.7    0.004    0.004    2.237    2.256
 mp_alltoall_i22                    627 13.8    1.395    2.220    1.395    2.220
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.418    2.202
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.087    2.133
 grid_integrate_task_list           119 12.3    2.061    2.082    2.061    2.082
 ot_diis_step                       108 11.5    0.014    0.014    2.002    2.002
 qs_ot_p2m_diag                      50 11.0    0.042    0.043    1.954    1.955
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.824    1.826
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.689    1.739
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.681    1.681
 mp_sum_l                          7207 12.9    1.008    1.556    1.008    1.556
 cp_fm_cholesky_invert               11 10.9    1.534    1.537    1.534    1.537
 rs_pw_transfer                     974 11.9    0.009    0.009    1.496    1.528
 grid_collocate_task_list           119  9.7    1.493    1.508    1.493    1.508
 potential_pw2rs                    119 12.3    0.014    0.014    1.479    1.481
 hybrid_alltoall_any               4725 16.4    0.087    0.147    1.143    1.414
 wfi_extrapolate                     11  7.9    0.001    0.001    1.409    1.409
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.388    1.388
 cp_fm_diag_elpa_base                50 14.0    1.244    1.296    1.386    1.386
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.341    1.351
 make_images_data                  4572 15.5    0.043    0.046    1.090    1.327
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.153    1.178
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.147    1.169
 mp_alltoall_d11v                  2130 13.8    1.087    1.106    1.087    1.106
 jit_kernel_multiply                  5 15.3    0.974    1.080    0.974    1.080
 multiply_cannon_sync_h2d          9144 15.5    1.044    1.047    1.044    1.047
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    0.987    1.027
 acc_transpose_blocks              9144 15.5    0.039    0.039    1.013    1.018
 qs_create_task_list                 11  7.9    0.029    0.029    0.971    0.986
 yz_to_x                            606 15.1    0.460    0.471    0.961    0.971
 generate_qs_task_list               11  8.9    0.370    0.389    0.942    0.957
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.917    0.931
 mp_alltoall_z22v                  1201 16.6    0.838    0.881    0.838    0.881
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=43.485000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=708.636364, yerr=13.254658
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             502.099968E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65372.
 MP_Allreduce         9840                    486.
 MP_Sync               100
 MP_Alltoall          1938                1379060.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_comm_split          48
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.137    0.266   88.863   88.880
 qs_mol_dyn_low                       1  2.0    0.004    0.009   88.123   88.157
 qs_forces                           11  3.9    0.005    0.024   84.437   84.441
 qs_energies                         11  4.9    0.001    0.002   81.565   81.580
 scf_env_do_scf                      11  5.9    0.000    0.001   72.253   72.256
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   66.582   66.583
 dbcsr_multiply_generic            2055 12.4    0.108    0.113   52.965   53.340
 qs_scf_new_mos                      99  7.5    0.000    0.001   48.887   49.016
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   48.886   49.015
 ot_scf_mini                         99  9.5    0.002    0.003   46.492   46.602
 velocity_verlet                     10  3.0    0.003    0.016   46.276   46.314
 multiply_cannon                   2055 13.4    0.174    0.181   43.108   44.308
 multiply_cannon_loop              2055 14.4    1.545    1.583   42.130   43.401
 ot_mini                             99 10.5    0.001    0.002   28.199   28.323
 qs_ot_get_derivative                99 11.5    0.001    0.001   21.413   21.536
 multiply_cannon_multrec          49320 15.4   12.078   12.740   17.187   17.815
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.785   14.993
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.015   14.784   14.993
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.982   13.176
 mp_waitall_1                    220248 16.4   11.860   12.908   11.860   12.908
 multiply_cannon_sync_h2d         49320 15.4    9.877   10.474    9.877   10.474
 qs_ot_get_p                        110 10.4    0.001    0.001    9.778    9.920
 multiply_cannon_metrocomm3       49320 15.4    0.078    0.082    7.107    8.444
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.472    8.078
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.191    7.815
 apply_single                       110 13.6    0.000    0.001    7.191    7.815
 sum_up_and_integrate               110 10.3    0.037    0.043    7.254    7.269
 integrate_v_rspace                 110 11.3    0.003    0.004    7.217    7.241
 init_scf_run                        11  5.9    0.000    0.001    7.214    7.214
 scf_env_initial_rho_setup           11  6.9    0.000    0.002    7.214    7.214
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    6.875    6.958
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.790    6.932
 calculate_rho_elec                 110  8.6    0.021    0.026    6.790    6.931
 mp_sum_l                          6514 12.8    5.560    6.606    5.560    6.606
 ot_diis_step                        99 11.5    0.006    0.008    6.535    6.535
 qs_ot_p2m_diag                      48 11.0    0.012    0.018    6.477    6.496
 init_scf_loop                       11  6.9    0.000    0.000    5.641    5.642
 dbcsr_mm_accdrv_process          87628 16.1    2.118    2.216    4.987    5.417
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    5.407    5.407
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.911    4.912
 cp_fm_redistribute_end              48 14.0    4.286    4.885    4.290    4.886
 cp_fm_diag_elpa_base                48 14.0    0.590    4.739    0.593    4.771
 rs_pw_transfer                     902 11.9    0.011    0.013    3.860    4.471
 density_rs2pw                      110  9.6    0.004    0.005    3.565    4.226
 wfi_extrapolate                     11  7.9    0.001    0.001    4.081    4.081
 make_m2s                          4110 13.4    0.060    0.065    3.967    4.074
 make_images                       4110 14.4    0.176    0.189    3.871    3.979
 calculate_dm_sparse                110  9.5    0.000    0.001    3.797    3.930
 multiply_cannon_metrocomm1       49320 15.4    0.059    0.064    2.781    3.913
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.868    3.872
 mp_sum_dm                          438  4.9    3.662    3.719    3.662    3.719
 md_write_output                     11  3.9    0.007    0.578    0.040    3.656
 md_output                           10  3.0    0.000    0.000    0.042    3.655
 update_particle_set                 20  4.0    0.000    0.000    3.602    3.641
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.542    3.629
 prepare_preconditioner              11  7.9    0.000    0.000    3.449    3.474
 make_preconditioner                 11  8.9    0.000    0.000    3.449    3.474
 pw_transfer                       1331 11.6    0.054    0.066    3.379    3.441
 grid_integrate_task_list           110 12.3    3.246    3.389    3.246    3.389
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.291    3.357
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    3.250    3.304
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.230    3.276
 write_trajectory                    44  4.9    0.032    3.065    0.033    3.078
 calculate_first_density_matrix       1  7.0    0.008    0.021    3.037    3.042
 fft3d_ps                          1111 14.6    0.784    0.883    2.818    2.877
 fft_wrap_pw1pw2_140                451 13.1    0.172    0.192    2.781    2.852
 potential_pw2rs                    110 12.3    0.006    0.007    2.781    2.809
 jit_kernel_multiply                 13 15.9    2.585    2.767    2.585    2.767
 acc_transpose_blocks             49320 15.4    0.229    0.242    2.566    2.652
 mp_alltoall_d11v                  2046 13.8    2.126    2.589    2.126    2.589
 mp_waitany                       14300 13.8    1.867    2.561    1.867    2.561
 grid_collocate_task_list           110  9.6    2.085    2.316    2.085    2.316
 mp_sum_d                          3883 11.9    1.493    1.981    1.493    1.981
 make_images_data                  4110 15.4    0.042    0.045    1.831    1.967
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.944    1.964
 cp_fm_cholesky_invert               11 10.9    1.920    1.924    1.920    1.924
 hybrid_alltoall_any               4261 16.3    0.081    0.482    1.589    1.845
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=88.880000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=477.000000, yerr=2.044949
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             589.877248E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65587.
 MP_Allreduce         9839                    562.
 MP_Sync               100
 MP_Alltoall          1717                2463018.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_comm_split          48
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.045    0.194   76.061   76.065
 qs_mol_dyn_low                       1  2.0    0.005    0.021   75.504   75.514
 qs_forces                           11  3.9    0.003    0.007   71.760   71.762
 qs_energies                         11  4.9    0.002    0.002   68.391   68.396
 scf_env_do_scf                      11  5.9    0.000    0.001   58.409   58.413
 scf_env_do_scf_inner_loop           99  6.5    0.005    0.022   50.528   50.529
 velocity_verlet                     10  3.0    0.001    0.001   40.463   40.541
 dbcsr_multiply_generic            2055 12.4    0.116    0.120   39.942   40.139
 multiply_cannon                   2055 13.4    0.221    0.242   33.041   34.450
 qs_scf_new_mos                      99  7.5    0.001    0.001   33.865   34.000
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   33.864   34.000
 multiply_cannon_loop              2055 14.4    0.930    0.956   31.713   32.789
 ot_scf_mini                         99  9.5    0.003    0.003   32.200   32.332
 ot_mini                             99 10.5    0.001    0.001   19.433   19.563
 multiply_cannon_multrec          24660 15.4    7.629    9.569   14.582   16.343
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.012   14.128
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.015   14.011   14.128
 qs_ot_get_derivative                99 11.5    0.001    0.001   13.561   13.683
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.335   12.429
 mp_waitall_1                    176588 16.5    8.862   11.349    8.862   11.349
 multiply_cannon_metrocomm3       24660 15.4    0.070    0.072    6.156    9.102
 multiply_cannon_sync_h2d         24660 15.4    7.019    8.115    7.019    8.115
 init_scf_loop                       11  6.9    0.000    0.000    7.844    7.845
 dbcsr_mm_accdrv_process          52282 16.1    5.071    6.059    6.794    7.547
 init_scf_run                        11  5.9    0.000    0.001    7.470    7.470
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    7.469    7.470
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.733    7.387
 apply_single                       110 13.6    0.000    0.001    6.732    7.387
 sum_up_and_integrate               110 10.3    0.051    0.058    6.787    6.798
 integrate_v_rspace                 110 11.3    0.002    0.003    6.736    6.749
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.363    6.372
 calculate_rho_elec                 110  8.6    0.039    0.047    6.362    6.372
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    5.400    6.277
 qs_ot_get_p                        110 10.4    0.001    0.001    6.110    6.273
 ot_diis_step                        99 11.5    0.010    0.010    5.824    5.824
 prepare_preconditioner              11  7.9    0.000    0.000    5.794    5.814
 make_preconditioner                 11  8.9    0.000    0.000    5.793    5.814
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.364    5.524
 make_m2s                          4110 13.4    0.057    0.060    4.282    4.771
 make_images                       4110 14.4    0.396    0.440    4.173    4.657
 qs_ot_p2m_diag                      48 11.0    0.028    0.044    4.215    4.238
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    4.163    4.165
 pw_transfer                       1331 11.6    0.066    0.073    3.862    4.018
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    3.755    3.914
 density_rs2pw                      110  9.6    0.004    0.004    3.430    3.875
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.790    3.850
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.786    3.786
 calculate_first_density_matrix       1  7.0    0.001    0.002    3.780    3.783
 mp_sum_dm                          438  4.9    3.683    3.781    3.683    3.781
 md_write_output                     11  3.9    0.078    3.696    0.079    3.715
 md_output                           10  3.0    0.000    0.000    0.081    3.715
 update_particle_set                 20  4.0    0.000    0.000    3.634    3.711
 rs_pw_transfer                     902 11.9    0.012    0.013    3.132    3.639
 wfi_extrapolate                     11  7.9    0.001    0.001    3.599    3.599
 grid_integrate_task_list           110 12.3    3.162    3.348    3.162    3.348
 fft_wrap_pw1pw2_140                451 13.1    0.203    0.220    3.168    3.327
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.194    3.263
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.252    3.254
 fft3d_ps                          1111 14.6    1.102    1.324    3.105    3.250
 calculate_dm_sparse                110  9.5    0.001    0.001    3.187    3.233
 cp_fm_redistribute_end              48 14.0    2.432    3.227    2.434    3.227
 cp_fm_diag_elpa_base                48 14.0    0.759    3.090    0.790    3.174
 mp_sum_l                          6514 12.8    2.132    2.932    2.132    2.932
 make_images_data                  4110 15.4    0.046    0.049    2.391    2.874
 hybrid_alltoall_any               4261 16.3    0.101    0.440    2.138    2.864
 cp_fm_cholesky_invert               11 10.9    2.748    2.755    2.748    2.755
 potential_pw2rs                    110 12.3    0.008    0.009    2.543    2.563
 grid_collocate_task_list           110  9.6    2.055    2.476    2.055    2.476
 jit_kernel_multiply                  8 16.3    1.374    2.446    1.374    2.446
 mp_alltoall_d11v                  2046 13.8    1.861    2.145    1.861    2.145
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.974    2.002
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.975    1.978
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.841    1.860
 mp_waitany                       10164 13.8    1.313    1.837    1.313    1.837
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.758    1.767
 mp_allgather_i34                  2055 14.4    0.657    1.733    0.657    1.733
 acc_transpose_blocks             24660 15.4    0.113    0.116    1.646    1.718
 multiply_cannon_metrocomm4       22605 15.4    0.073    0.077    0.772    1.706
 rs_pw_transfer_RS2PW_140           121 11.5    0.208    0.218    1.041    1.531
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=76.065000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=556.454545, yerr=8.326995
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             660.467712E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65578.
 MP_Allreduce         9838                    559.
 MP_Sync               100
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_comm_split          48
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.396    0.673   65.303   65.332
 qs_mol_dyn_low                       1  2.0    0.003    0.005   64.176   64.186
 qs_forces                           11  3.9    0.006    0.034   62.213   62.215
 qs_energies                         11  4.9    0.015    0.055   58.824   58.827
 scf_env_do_scf                      11  5.9    0.001    0.001   49.688   49.688
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   40.940   40.941
 velocity_verlet                     10  3.0    0.004    0.012   34.871   34.931
 dbcsr_multiply_generic            2055 12.4    0.110    0.114   29.913   30.186
 qs_scf_new_mos                      99  7.5    0.001    0.001   25.588   25.677
 qs_scf_loop_do_ot                   99  8.5    0.001    0.003   25.588   25.677
 multiply_cannon                   2055 13.4    0.211    0.220   23.436   24.498
 ot_scf_mini                         99  9.5    0.003    0.004   24.360   24.467
 multiply_cannon_loop              2055 14.4    0.614    0.633   22.209   23.260
 ot_mini                             99 10.5    0.001    0.001   14.217   14.327
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.717   12.861
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.019   12.716   12.861
 multiply_cannon_multrec          16440 15.4    3.829    5.036   10.467   11.682
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.106   11.239
 mp_waitall_1                    139946 16.5    7.496   10.571    7.496   10.571
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.688    9.794
 init_scf_loop                       11  6.9    0.001    0.003    8.711    8.715
 multiply_cannon_metrocomm3       16440 15.4    0.043    0.044    4.677    7.648
 prepare_preconditioner              11  7.9    0.000    0.000    6.882    6.903
 make_preconditioner                 11  8.9    0.000    0.001    6.882    6.903
 sum_up_and_integrate               110 10.3    0.059    0.060    6.787    6.818
 integrate_v_rspace                 110 11.3    0.003    0.003    6.728    6.758
 dbcsr_mm_accdrv_process          34862 16.1    5.286    6.249    6.492    6.650
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.211    6.584
 init_scf_run                        11  5.9    0.001    0.003    6.443    6.444
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    6.443    6.444
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.125    6.135
 calculate_rho_elec                 110  8.6    0.058    0.059    6.124    6.134
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    5.161    5.682
 apply_single                       110 13.6    0.000    0.000    5.161    5.682
 qs_ot_get_p                        110 10.4    0.001    0.001    5.358    5.508
 make_m2s                          4110 13.4    0.050    0.051    4.202    4.560
 ot_diis_step                        99 11.5    0.011    0.011    4.500    4.500
 density_rs2pw                      110  9.6    0.004    0.004    3.222    4.456
 make_images                       4110 14.4    0.390    0.508    4.086    4.444
 multiply_cannon_sync_h2d         16440 15.4    3.659    4.222    3.659    4.222
 rs_pw_transfer                     902 11.9    0.010    0.011    2.795    4.013
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.234    3.995
 pw_transfer                       1331 11.6    0.066    0.074    3.921    3.944
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.814    3.838
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    3.688    3.694
 grid_integrate_task_list           110 12.3    3.196    3.390    3.196    3.390
 calculate_first_density_matrix       1  7.0    0.012    0.051    3.346    3.348
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.345    3.347
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.335    3.335
 fft_wrap_pw1pw2_140                451 13.1    0.213    0.217    3.293    3.319
 fft3d_ps                          1111 14.6    1.088    1.098    3.111    3.137
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.025    3.068
 wfi_extrapolate                     11  7.9    0.001    0.001    3.010    3.010
 make_images_data                  4110 15.4    0.043    0.047    2.447    2.934
 calculate_dm_sparse                110  9.5    0.001    0.001    2.781    2.815
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.786    2.788
 cp_fm_cholesky_invert               11 10.9    2.759    2.765    2.759    2.765
 cp_fm_redistribute_end              48 14.0    1.740    2.759    1.741    2.760
 hybrid_alltoall_any               4261 16.3    0.105    0.373    2.164    2.748
 cp_fm_diag_elpa_base                48 14.0    0.955    2.605    1.013    2.708
 jit_kernel_multiply                 10 16.4    0.817    2.685    0.817    2.685
 mp_sum_l                          6514 12.8    1.795    2.531    1.795    2.531
 potential_pw2rs                    110 12.3    0.011    0.011    2.487    2.514
 mp_waitany                       17072 13.8    1.221    2.505    1.221    2.505
 grid_collocate_task_list           110  9.6    2.084    2.477    2.084    2.477
 multiply_cannon_metrocomm4       14385 15.4    0.045    0.049    0.900    2.463
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.374    2.428
 mp_irecv_dv                      48980 15.7    0.829    2.336    0.829    2.336
 mp_alltoall_d11v                  2046 13.8    1.756    2.182    1.756    2.182
 rs_pw_transfer_RS2PW_140           121 11.5    0.179    0.200    0.932    2.151
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    2.056    2.057
 mp_sum_dm                          438  4.9    1.953    2.039    1.953    2.039
 update_particle_set                 20  4.0    0.000    0.000    1.880    1.943
 md_write_output                     11  3.9    0.061    1.923    0.062    1.942
 md_output                           10  3.0    0.000    0.000    0.064    1.942
 dbcsr_complete_redistribute        325 12.2    0.317    0.362    1.475    1.939
 cp_fm_upper_to_full                 70 13.6    1.401    1.847    1.401    1.847
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.811    1.828
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.735    1.746
 mp_allgather_i34                  2055 14.4    0.542    1.635    0.542    1.635
 cp_fm_cholesky_decompose            22 10.9    1.602    1.621    1.602    1.621
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.355    1.497
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.967    1.424
 rs_gather_matrices                 110 12.3    0.235    0.260    0.966    1.324
 acc_transpose_blocks             16440 15.4    0.074    0.075    1.288    1.318
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=65.332000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=625.363636, yerr=8.369563
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             729.006080E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65576.
 MP_Allreduce         9838                    600.
 MP_Sync               100
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_comm_split          48
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.176    0.765   78.518   78.568
 qs_mol_dyn_low                       1  2.0    0.003    0.003   76.333   76.348
 qs_forces                           11  3.9    0.007    0.010   73.825   73.826
 qs_energies                         11  4.9    0.038    0.291   70.377   70.385
 scf_env_do_scf                      11  5.9    0.004    0.029   56.326   56.328
 scf_env_do_scf_inner_loop           99  6.5    0.021    0.045   43.040   43.044
 velocity_verlet                     10  3.0    0.001    0.001   39.827   39.930
 dbcsr_multiply_generic            2055 12.4    0.176    0.184   33.649   33.878
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.861   27.965
 qs_scf_loop_do_ot                   99  8.5    0.006    0.040   27.860   27.964
 multiply_cannon                   2055 13.4    0.241    0.260   26.358   27.557
 ot_scf_mini                         99  9.5    0.004    0.012   26.219   26.334
 multiply_cannon_loop              2055 14.4    0.882    0.901   24.958   25.562
 ot_mini                             99 10.5    0.001    0.001   15.538   15.677
 multiply_cannon_multrec          24660 15.4    4.229    7.021   13.815   15.317
 init_scf_loop                       11  6.9    0.029    0.234   13.241   13.242
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.110   13.215
 qs_ks_build_kohn_sham_matrix       110  9.3    0.014    0.025   13.110   13.215
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.728   11.827
 qs_ot_get_derivative                99 11.5    0.001    0.001   11.318   11.439
 dbcsr_mm_accdrv_process          52304 16.0    7.418    8.969    9.439   10.726
 prepare_preconditioner              11  7.9    0.000    0.002   10.475   10.507
 make_preconditioner                 11  8.9    0.024    0.191   10.475   10.507
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.644   10.164
 init_scf_run                        11  5.9    0.000    0.002    9.608    9.609
 scf_env_initial_rho_setup           11  6.9    0.001    0.004    9.608    9.609
 mp_waitall_1                    121746 16.5    4.901    6.986    4.901    6.986
 sum_up_and_integrate               110 10.3    0.066    0.069    6.570    6.596
 integrate_v_rspace                 110 11.3    0.003    0.003    6.504    6.531
 calculate_first_density_matrix       1  7.0    0.008    0.059    6.386    6.389
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.305    6.317
 calculate_rho_elec                 110  8.6    0.077    0.081    6.304    6.316
 qs_ot_get_p                        110 10.4    0.001    0.001    5.688    5.839
 make_m2s                          4110 13.4    0.060    0.061    5.478    5.805
 make_images                       4110 14.4    0.573    0.694    5.337    5.659
 calculate_dm_sparse                110  9.5    0.001    0.001    5.397    5.426
 cp_fm_upper_to_full                 70 13.8    3.305    4.714    3.305    4.714
 rs_pw_transfer                     902 11.9    0.010    0.011    3.521    4.180
 ot_diis_step                        99 11.5    0.011    0.012    4.176    4.176
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.051    4.140
 apply_single                       110 13.6    0.000    0.000    4.051    4.140
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.992    4.067
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    4.000    4.014
 multiply_cannon_metrocomm3       24660 15.4    0.035    0.036    1.815    4.005
 acc_transpose_blocks             24660 15.4    0.108    0.110    3.667    3.922
 pw_transfer                       1331 11.6    0.065    0.075    3.863    3.905
 dbcsr_complete_redistribute        325 12.2    0.420    0.462    2.744    3.892
 qs_ot_p2m_diag                      48 11.0    0.054    0.063    3.835    3.849
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.757    3.803
 density_rs2pw                      110  9.6    0.004    0.004    3.221    3.744
 qs_energies_init_hamiltonians       11  5.9    0.003    0.020    3.509    3.515
 grid_integrate_task_list           110 12.3    3.259    3.436    3.259    3.436
 multiply_cannon_sync_h2d         24660 15.4    3.176    3.388    3.176    3.388
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.353    3.354
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.204    3.340
 acc_transpose_blocks_kernels     24660 16.4    0.304    0.311    3.019    3.271
 fft_wrap_pw1pw2_140                451 13.1    0.209    0.221    3.212    3.259
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.194    3.196
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.103    3.167
 fft3d_ps                          1111 14.6    1.092    1.131    3.044    3.093
 jit_kernel_multiply                 10 15.6    1.690    3.078    1.690    3.078
 make_images_data                  4110 15.4    0.046    0.050    2.693    3.027
 wfi_extrapolate                     11  7.9    0.001    0.001    3.008    3.008
 jit_kernel_transpose                 5 15.6    2.715    2.965    2.715    2.965
 hybrid_alltoall_any               4261 16.3    0.119    0.459    2.278    2.959
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.798    2.916
 mp_alltoall_i22                    605 13.7    1.722    2.915    1.722    2.915
 cp_fm_cholesky_invert               11 10.9    2.896    2.906    2.896    2.906
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.833    2.868
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.797    2.799
 cp_fm_redistribute_end              48 14.0    1.397    2.764    1.399    2.765
 cp_fm_diag_elpa_base                48 14.0    1.283    2.627    1.362    2.734
 mp_waitany                       13376 13.8    2.115    2.725    2.115    2.725
 mp_sum_d                          3881 11.9    2.120    2.717    2.120    2.717
 mp_sum_dm                          438  4.9    2.432    2.550    2.432    2.550
 rs_pw_transfer_RS2PW_140           121 11.5    0.173    0.193    1.846    2.512
 md_write_output                     11  3.9    0.095    2.276    0.105    2.497
 update_particle_set                 20  4.0    0.000    0.000    2.385    2.489
 grid_collocate_task_list           110  9.6    2.196    2.437    2.196    2.437
 qs_env_update_s_mstruct             11  6.9    0.001    0.004    2.293    2.425
 potential_pw2rs                    110 12.3    0.012    0.013    2.262    2.284
 mp_alltoall_d11v                  2046 13.8    1.736    2.181    1.736    2.181
 md_output                           10  3.0    0.000    0.000    0.083    1.926
 mp_sum_l                          6514 12.8    1.231    1.828    1.231    1.828
 cp_fm_cholesky_decompose            22 10.9    1.725    1.781    1.725    1.781
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    1.726    1.755
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.597    1.709
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.679    1.691
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=78.568000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=691.272727, yerr=7.299949
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             833.454080E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65574.
 MP_Allreduce         9838                    640.
 MP_Sync               100
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_comm_split          48
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.044    0.054   62.548   62.548
 qs_mol_dyn_low                       1  2.0    0.003    0.003   61.238   61.376
 qs_forces                           11  3.9    0.002    0.003   59.846   59.846
 qs_energies                         11  4.9    0.004    0.005   56.125   56.129
 scf_env_do_scf                      11  5.9    0.000    0.001   46.102   46.102
 scf_env_do_scf_inner_loop           99  6.5    0.020    0.025   38.332   38.332
 velocity_verlet                     10  3.0    0.001    0.001   32.596   32.682
 dbcsr_multiply_generic            2055 12.4    0.119    0.120   24.832   24.990
 qs_scf_new_mos                      99  7.5    0.001    0.001   21.097   21.158
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   21.096   21.157
 multiply_cannon                   2055 13.4    0.243    0.255   19.245   20.783
 ot_scf_mini                         99  9.5    0.002    0.002   19.862   19.909
 multiply_cannon_loop              2055 14.4    0.321    0.333   17.871   18.295
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.101   12.138
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   12.101   12.138
 ot_mini                             99 10.5    0.001    0.001   11.083   11.124
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.751   10.781
 multiply_cannon_multrec           8220 15.4    3.270    4.690    8.664   10.145
 mp_waitall_1                    103326 16.6    6.047    8.034    6.047    8.034
 init_scf_loop                       11  6.9    0.000    0.000    7.723    7.723
 qs_ot_get_derivative                99 11.5    0.001    0.001    7.301    7.347
 init_scf_run                        11  5.9    0.000    0.001    6.815    6.815
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    6.815    6.815
 sum_up_and_integrate               110 10.3    0.079    0.081    6.630    6.642
 integrate_v_rspace                 110 11.3    0.003    0.003    6.551    6.564
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.448    6.460
 calculate_rho_elec                 110  8.6    0.116    0.116    6.447    6.459
 dbcsr_mm_accdrv_process          17442 15.9    3.191    4.675    5.265    6.188
 prepare_preconditioner              11  7.9    0.000    0.000    6.035    6.040
 make_preconditioner                 11  8.9    0.000    0.000    6.035    6.040
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.608    5.683
 qs_ot_get_p                        110 10.4    0.001    0.001    4.864    4.894
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.018    2.993    4.628
 make_m2s                          4110 13.4    0.038    0.040    4.260    4.552
 make_images                       4110 14.4    0.635    0.688    4.130    4.421
 pw_transfer                       1331 11.6    0.066    0.072    4.203    4.213
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.753    4.132
 apply_single                       110 13.6    0.000    0.000    3.753    4.131
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    4.095    4.110
 calculate_first_density_matrix       1  7.0    0.008    0.008    3.959    3.960
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.788    3.789
 ot_diis_step                        99 11.5    0.012    0.012    3.760    3.760
 grid_integrate_task_list           110 12.3    3.359    3.563    3.359    3.563
 fft_wrap_pw1pw2_140                451 13.1    0.217    0.221    3.541    3.559
 density_rs2pw                      110  9.6    0.004    0.004    3.178    3.555
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.522    3.526
 fft3d_ps                          1111 14.6    1.147    1.171    3.336    3.350
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.289    3.308
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.218    3.218
 multiply_cannon_sync_h2d          8220 15.4    2.908    3.049    2.908    3.049
 cp_fm_cholesky_invert               11 10.9    2.920    2.924    2.920    2.924
 make_images_data                  4110 15.4    0.038    0.043    2.421    2.869
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.727    2.840
 wfi_extrapolate                     11  7.9    0.001    0.001    2.758    2.759
 hybrid_alltoall_any               4261 16.3    0.199    0.860    2.323    2.755
 qs_energies_init_hamiltonians       11  5.9    0.002    0.004    2.730    2.730
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.710    2.711
 calculate_dm_sparse                110  9.5    0.001    0.001    2.657    2.694
 cp_fm_redistribute_end              48 14.0    0.689    2.683    0.693    2.684
 cp_fm_diag_elpa_base                48 14.0    1.808    2.491    1.983    2.655
 rs_pw_transfer                     902 11.9    0.010    0.011    2.239    2.620
 jit_kernel_multiply                  9 15.8    1.766    2.568    1.766    2.568
 grid_collocate_task_list           110  9.6    2.278    2.538    2.278    2.538
 potential_pw2rs                    110 12.3    0.015    0.015    2.172    2.181
 mp_bcast_b                        1707 13.7    1.834    2.089    1.834    2.089
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.773    2.000
 external_control                   109  7.1    0.117    1.869    1.907    1.996
 mp_alltoall_d11v                  2046 13.8    1.622    1.937    1.622    1.937
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    1.771    1.788
 dbcsr_complete_redistribute        325 12.2    0.654    0.725    1.613    1.694
 acc_transpose_blocks              8220 15.4    0.036    0.037    1.623    1.693
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.687    1.693
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.563    1.691
 mp_allgather_i34                  2055 14.4    0.556    1.678    0.556    1.678
 cp_fm_cholesky_decompose            22 10.9    1.664    1.675    1.664    1.675
 multiply_cannon_metrocomm1        8220 15.4    0.020    0.021    0.781    1.644
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.627    1.639
 md_output                           10  3.0    0.000    0.000    0.097    1.509
 md_write_output                     11  3.9    0.093    1.479    0.096    1.508
 mp_waitany                        9240 13.8    1.082    1.490    1.082    1.490
 acc_transpose_blocks_kernels      8220 16.4    0.110    0.113    1.390    1.455
 mp_sum_dm                          438  4.9    1.307    1.398    1.307    1.398
 update_particle_set                 20  4.0    0.000    0.000    1.281    1.367
 qs_create_task_list                 11  7.9    0.012    0.012    1.244    1.350
 jit_kernel_transpose                 5 15.6    1.280    1.344    1.280    1.344
 generate_qs_task_list               11  8.9    0.378    0.446    1.232    1.338
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=62.548000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=783.454545, yerr=10.940863
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.369997E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67104.
 MP_Allreduce         9672                    819.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.055    0.118   92.974   92.994
 qs_mol_dyn_low                       1  2.0    0.003    0.003   91.863   91.872
 qs_forces                           11  3.9    0.015    0.029   91.798   91.799
 qs_energies                         11  4.9    0.008    0.008   87.555   87.569
 scf_env_do_scf                      11  5.9    0.001    0.001   75.919   75.919
 velocity_verlet                     10  3.0    0.001    0.001   56.592   56.598
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   47.115   47.117
 dbcsr_multiply_generic            2055 12.4    0.150    0.204   32.191   32.271
 init_scf_loop                       11  6.9    0.000    0.000   28.731   28.734
 qs_scf_new_mos                      99  7.5    0.001    0.001   28.584   28.662
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   28.583   28.662
 ot_scf_mini                         99  9.5    0.002    0.002   26.830   26.887
 prepare_preconditioner              11  7.9    0.000    0.000   26.660   26.670
 make_preconditioner                 11  8.9    0.000    0.000   26.660   26.670
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.700   26.136
 multiply_cannon                   2055 13.4    0.343    0.367   25.125   25.801
 multiply_cannon_loop              2055 14.4    0.342    0.346   23.308   23.955
 cp_fm_upper_to_full                 70 14.2   12.788   18.474   12.788   18.474
 ot_mini                             99 10.5    0.001    0.001   16.033   16.097
 rebuild_ks_matrix                  110  8.3    0.001    0.001   14.340   14.415
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   14.339   14.414
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.989   13.055
 multiply_cannon_multrec           8220 15.4    4.577    4.860   11.238   12.490
 qs_ot_get_derivative                99 11.5    0.001    0.001   11.377   11.440
 dbcsr_complete_redistribute        325 12.2    1.030    1.066    7.542   10.904
 mp_waitall_1                     84994 16.7    8.709   10.819    8.709   10.819
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.520    9.887
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.944    9.285
 mp_alltoall_i22                    605 13.7    5.575    8.921    5.575    8.921
 dbcsr_mm_accdrv_process          11614 15.7    4.202    5.830    6.518    7.930
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.831    7.866
 calculate_rho_elec                 110  8.6    0.227    0.227    7.830    7.866
 sum_up_and_integrate               110 10.3    0.150    0.150    7.449    7.461
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.018    5.936    7.321
 integrate_v_rspace                 110 11.3    0.004    0.004    7.299    7.312
 init_scf_run                        11  5.9    0.000    0.001    7.184    7.185
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    7.184    7.184
 make_m2s                          4110 13.4    0.043    0.044    5.293    5.811
 qs_ot_get_p                        110 10.4    0.001    0.001    5.586    5.675
 make_images                       4110 14.4    0.875    0.929    5.104    5.621
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.994    5.427
 pw_transfer                       1331 11.6    0.075    0.076    5.370    5.375
 cp_fm_cholesky_invert               11 10.9    5.348    5.353    5.348    5.353
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.727    5.333
 apply_single                       110 13.6    0.000    0.000    4.727    5.333
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.010    5.253    5.258
 ot_diis_step                        99 11.5    0.015    0.015    4.637    4.638
 fft_wrap_pw1pw2_140                451 13.1    0.230    0.233    4.574    4.584
 fft3d_ps                          1111 14.6    1.296    1.304    4.439    4.448
 qs_ot_p2m_diag                      48 11.0    0.151    0.155    4.025    4.032
 multiply_cannon_sync_h2d          8220 15.4    3.948    3.960    3.948    3.960
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    3.928    3.931
 density_rs2pw                      110  9.6    0.004    0.004    3.851    3.880
 qs_energies_init_hamiltonians       11  5.9    0.005    0.007    3.752    3.765
 calculate_first_density_matrix       1  7.0    0.001    0.001    3.724    3.724
 grid_integrate_task_list           110 12.3    3.670    3.709    3.670    3.709
 hybrid_alltoall_any               4261 16.3    0.255    0.553    2.906    3.661
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.587    3.587
 make_images_data                  4110 15.4    0.041    0.044    2.875    3.528
 acc_transpose_blocks              8220 15.4    0.035    0.036    1.670    3.525
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.406    3.425
 wfi_extrapolate                     11  7.9    0.001    0.001    3.336    3.336
 calculate_dm_sparse                110  9.5    0.001    0.001    3.284    3.305
 acc_transpose_blocks_kernels      8220 16.4    0.110    0.111    1.432    3.285
 jit_kernel_transpose                 5 15.6    1.322    3.176    1.322    3.176
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.988    2.988
 cp_fm_diag_elpa_base                48 14.0    2.449    2.644    2.986    2.986
 jit_kernel_multiply                 10 15.2    2.116    2.705    2.116    2.705
 grid_collocate_task_list           110  9.6    2.640    2.690    2.640    2.690
 potential_pw2rs                    110 12.3    0.021    0.021    2.544    2.551
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.389    2.427
 rs_pw_transfer                     902 11.9    0.011    0.011    2.324    2.375
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    2.276    2.356
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.104    2.226
 cp_fm_cholesky_decompose            22 10.9    2.029    2.054    2.029    2.054
 mp_alltoall_d11v                  2046 13.8    1.985    2.045    1.985    2.045
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.955    1.963
 qs_create_task_list                 11  7.9    0.001    0.002    1.910    1.961
 generate_qs_task_list               11  8.9    0.735    0.788    1.909    1.959
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=92.994000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1211.363636, yerr=67.067500
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             632.057856E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955383376
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4085                  56760.
 MP_Allreduce        11253                    785.
 MP_Sync               170
 MP_Alltoall          2226                2520935.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_comm_split          83
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.091    0.336  208.779  208.793
 qs_mol_dyn_low                       1  2.0    0.004    0.009  207.101  207.114
 qs_forces                           11  3.9    0.009    0.050  206.130  206.132
 qs_energies                         11  4.9    0.002    0.004  200.541  200.557
 scf_env_do_scf                      11  5.9    0.001    0.001  182.096  182.099
 scf_env_do_scf_inner_loop          117  6.6    0.006    0.034  160.840  160.842
 dbcsr_multiply_generic            2507 12.6    0.216    0.219  124.826  125.303
 velocity_verlet                     10  3.0    0.001    0.001  123.907  123.918
 qs_scf_new_mos                     117  7.6    0.001    0.001  120.270  120.523
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  120.269  120.523
 ot_scf_mini                        117  9.6    0.003    0.003  113.700  113.940
 multiply_cannon                   2507 13.6    0.236    0.244  101.665  103.299
 multiply_cannon_loop              2507 14.6    2.118    2.180   99.427  100.873
 ot_mini                            117 10.6    0.001    0.001   65.210   65.435
 multiply_cannon_multrec          60168 15.6   33.240   35.866   42.595   44.560
 qs_ot_get_derivative               117 11.6    0.001    0.001   40.442   40.662
 rebuild_ks_matrix                  128  8.3    0.001    0.001   34.378   34.640
 qs_ks_build_kohn_sham_matrix       128  9.3    0.026    0.097   34.377   34.639
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.945   31.196
 mp_waitall_1                    267128 16.5   27.959   30.805   27.959   30.805
 multiply_cannon_sync_h2d         60168 15.6   27.291   28.905   27.291   28.905
 qs_ot_get_p                        128 10.4    0.001    0.001   26.796   27.015
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.260   24.971
 apply_single                       128 13.6    0.001    0.001   24.259   24.971
 ot_diis_step                       117 11.6    0.007    0.008   24.424   24.425
 init_scf_loop                       11  6.9    0.000    0.001   21.178   21.179
 qs_ot_p2m_diag                      83 11.4    0.077    0.091   19.910   19.994
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   18.730   18.914
 multiply_cannon_metrocomm3       60168 15.6    0.114    0.119   15.458   17.331
 cp_dbcsr_syevd                      83 12.4    0.004    0.005   17.316   17.318
 prepare_preconditioner              11  7.9    0.000    0.000   16.015   16.065
 make_preconditioner                 11  8.9    0.000    0.000   16.015   16.065
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.257   15.439
 sum_up_and_integrate               128 10.3    0.090    0.108   14.496   14.515
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   14.463   14.469
 integrate_v_rspace                 128 11.3    0.004    0.009   14.406   14.429
 cp_fm_redistribute_end              83 14.4   11.432   14.385   11.443   14.387
 init_scf_run                        11  5.9    0.000    0.001   14.195   14.195
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   14.194   14.195
 cp_fm_diag_elpa_base                83 14.4    2.904   14.017    2.935   14.125
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.014   14.108
 calculate_rho_elec                 128  8.7    0.045    0.064   14.014   14.107
 make_m2s                          5014 13.6    0.104    0.113   13.801   14.075
 make_images                       5014 14.6    0.397    0.415   13.620   13.907
 density_rs2pw                      128  9.7    0.006    0.007    7.370   10.581
 dbcsr_mm_accdrv_process         124484 16.2    3.368    3.524    8.913    9.599
 calculate_dm_sparse                128  9.5    0.001    0.001    9.130    9.219
 cp_fm_cholesky_invert               11 10.9    9.142    9.149    9.142    9.149
 wfi_extrapolate                     11  7.9    0.001    0.001    9.130    9.130
 rs_pw_transfer                    1046 11.9    0.016    0.019    5.932    9.109
 mp_sum_l                          7870 13.0    8.181    9.083    8.181    9.083
 pw_transfer                       1547 11.6    0.075    0.098    8.222    8.463
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.012    8.017    8.263
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    7.899    8.034
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    7.825    7.933
 make_images_data                  5014 15.6    0.066    0.072    6.755    7.644
 grid_integrate_task_list           128 12.3    7.059    7.598    7.059    7.598
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    7.470    7.479
 hybrid_alltoall_any               5200 16.5    0.289    2.259    5.922    7.267
 multiply_cannon_metrocomm1       60168 15.6    0.086    0.092    5.922    7.267
 fft_wrap_pw1pw2_140                523 13.2    0.448    0.514    6.948    7.235
 fft3d_ps                          1291 14.7    2.108    2.895    6.799    6.978
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    6.229    6.402
 mp_waitany                       16020 13.9    2.731    5.899    2.731    5.899
 grid_collocate_task_list           128  9.7    4.569    5.760    4.569    5.760
 mp_alltoall_d11v                  2415 14.1    4.227    5.706    4.227    5.706
 rs_pw_transfer_RS2PW_140           139 11.5    0.279    0.296    2.146    5.325
 mp_sum_d                          4464 12.1    3.991    4.915    3.991    4.915
 calculate_first_density_matrix       1  7.0    0.000    0.001    4.894    4.901
 potential_pw2rs                    128 12.3    0.009    0.011    4.859    4.885
 cp_fm_cholesky_decompose            22 10.9    4.685    4.700    4.685    4.700
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=208.793000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=595.545455, yerr=7.177789
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420243808256       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528908111872       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514772E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.183246E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755942624       0.0%      0.0%    100.0%
 number of processed stacks               5975232       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.7
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             826.728448E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2406720
 MPI messages size (bytes):
  total size                         4.100943E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.703955E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               70860               2317615104
     32768 < size <=   131072              722992              55511613440
    131072 < size <=  4194304             1375664            1398181724160
   4194304 < size <= 16777216              154704            1463835059104
  16777216 < size                           67584            1181116006400
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4103                  56957.
 MP_Allreduce        11297                    945.
 MP_Sync               170
 MP_Alltoall          1969                4869808.
 MP_SendRecv         12032                  47072.
 MP_ISendRecv        12032                  47072.
 MP_Wait             25916
 MP_comm_split          83
 MP_ISend            11748                 212467.
 MP_IRecv            11748                 212467.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.188    0.504  200.992  200.993
 qs_mol_dyn_low                       1  2.0    0.027    0.191  199.227  199.243
 qs_forces                           11  3.9    0.020    0.131  199.122  199.147
 qs_energies                         11  4.9    0.071    0.457  192.210  192.254
 scf_env_do_scf                      11  5.9    0.015    0.118  171.308  171.318
 scf_env_do_scf_inner_loop          117  6.6    0.012    0.037  137.419  137.432
 velocity_verlet                     10  3.0    0.001    0.001  120.706  120.707
 dbcsr_multiply_generic            2507 12.6    0.243    0.249  101.428  102.712
 qs_scf_new_mos                     117  7.6    0.001    0.001   96.925   97.486
 qs_scf_loop_do_ot                  117  8.6    0.006    0.039   96.924   97.485
 ot_scf_mini                        117  9.6    0.019    0.130   92.135   92.774
 multiply_cannon                   2507 13.6    0.477    0.530   80.269   84.403
 multiply_cannon_loop              2507 14.6    1.259    1.311   77.051   80.011
 ot_mini                            117 10.6    0.001    0.001   52.499   53.090
 mp_waitall_1                    214728 16.6   26.541   41.768   26.541   41.768
 multiply_cannon_multrec          30084 15.6   21.760   26.618   32.628   38.200
 rebuild_ks_matrix                  128  8.3    0.001    0.001   34.170   34.808
 qs_ks_build_kohn_sham_matrix       128  9.3    0.019    0.039   34.170   34.808
 init_scf_loop                       11  6.9    0.009    0.075   33.750   33.751
 multiply_cannon_metrocomm3       30084 15.6    0.094    0.100   17.491   31.429
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.838   31.389
 qs_ot_get_derivative               117 11.6    0.001    0.002   30.404   31.027
 prepare_preconditioner              11  7.9    0.004    0.029   28.624   28.720
 make_preconditioner                 11  8.9    0.001    0.007   28.620   28.720
 make_full_inverse_cholesky          11  9.9    0.000    0.000   27.295   27.869
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   22.628   23.896
 apply_single                       128 13.6    0.001    0.001   22.628   23.896
 qs_ot_get_p                        128 10.4    0.001    0.001   21.489   22.207
 multiply_cannon_sync_h2d         30084 15.6   19.054   21.942   19.054   21.942
 ot_diis_step                       117 11.6    0.014    0.015   21.925   21.927
 cp_fm_cholesky_invert               11 10.9   16.668   16.681   16.668   16.681
 qs_ot_p2m_diag                      83 11.4    0.188    0.216   16.595   16.635
 make_m2s                          5014 13.6    0.090    0.096   14.607   15.959
 make_images                       5014 14.6    1.142    1.323   14.395   15.750
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   15.442   15.443
 sum_up_and_integrate               128 10.3    0.122    0.192   15.181   15.217
 integrate_v_rspace                 128 11.3    0.018    0.120   15.059   15.106
 init_scf_run                        11  5.9    0.000    0.001   14.907   14.908
 scf_env_initial_rho_setup           11  6.9    0.003    0.022   14.906   14.908
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.367   14.394
 calculate_rho_elec                 128  8.7    0.088    0.105   14.367   14.393
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   12.341   12.355
 cp_fm_redistribute_end              83 14.4    7.206   12.282    7.216   12.283
 cp_fm_diag_elpa_base                83 14.4    4.826   11.759    5.041   12.132
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   10.968   11.385
 dbcsr_mm_accdrv_process          62242 16.2    4.976    6.043   10.335   11.064
 multiply_cannon_metrocomm4       27577 15.6    0.096    0.110    3.801   11.037
 mp_irecv_dv                      69486 16.3    3.606   10.647    3.606   10.647
 make_images_data                  5014 15.6    0.063    0.071    8.826   10.470
 density_rs2pw                      128  9.7    0.006    0.007    7.711   10.090
 hybrid_alltoall_any               5200 16.5    0.340    1.497    7.595    9.832
 pw_transfer                       1547 11.6    0.086    0.105    9.356    9.452
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.133    9.285
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.011    9.130    9.230
 rs_pw_transfer                    1046 11.9    0.014    0.016    6.354    8.817
 wfi_extrapolate                     11  7.9    0.001    0.001    8.453    8.453
 fft_wrap_pw1pw2_140                523 13.2    0.474    0.530    8.054    8.159
 fft3d_ps                          1291 14.7    2.754    2.924    7.495    7.562
 grid_integrate_task_list           128 12.3    7.169    7.541    7.169    7.541
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    7.196    7.208
 calculate_dm_sparse                128  9.5    0.001    0.001    7.021    7.159
 cp_fm_cholesky_decompose            22 10.9    6.946    7.030    6.946    7.030
 mp_sum_l                          7870 13.0    4.754    6.867    4.754    6.867
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    6.171    6.345
 calculate_first_density_matrix       1  7.0    0.046    0.083    6.183    6.188
 grid_collocate_task_list           128  9.7    4.718    5.842    4.718    5.842
 mp_waitany                       11748 13.9    2.997    5.510    2.997    5.510
 potential_pw2rs                    128 12.3    0.015    0.018    5.411    5.463
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    5.350    5.426
 mp_allgather_i34                  2507 14.6    1.796    5.094    1.796    5.094
 rs_pw_transfer_RS2PW_140           139 11.5    0.356    0.396    2.591    5.042
 mp_alltoall_d11v                  2415 14.1    4.187    4.933    4.187    4.933
 mp_sum_d                          4473 12.2    3.362    4.813    3.362    4.813
 dbcsr_complete_redistribute        395 12.7    0.812    1.013    3.387    4.254
 qs_energies_init_hamiltonians       11  5.9    0.004    0.031    4.133    4.162
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=200.993000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=787.727273, yerr=1.863082
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410023282688       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444707676160       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796579E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.906045E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705500928       0.0%      0.0%    100.0%
 number of processed stacks               3951168       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1697.1
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank             954.458112E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1033760
 MPI messages size (bytes):
  total size                         2.695213E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.607194E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              279168              36591108096
    131072 < size <=  4194304              654272             987691483136
   4194304 < size <= 16777216               65184             925172905552
  16777216 < size                           28448             745747251200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4085                  57194.
 MP_Allreduce        11251                    986.
 MP_Sync               168
 MP_Alltoall          1700                9383497.
 MP_SendRecv          7874                  75008.
 MP_ISendRecv         7874                  75008.
 MP_Wait             21654
 MP_comm_split          82
 MP_ISend            11660                 275234.
 MP_IRecv            11660                 275234.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.156    1.270  186.738  187.510
 qs_mol_dyn_low                       1  2.0    0.050    0.191  185.026  185.040
 qs_forces                           11  3.9    0.015    0.095  184.249  184.305
 qs_energies                         11  4.9    0.122    0.455  177.495  177.556
 scf_env_do_scf                      11  5.9    0.005    0.041  157.236  157.236
 scf_env_do_scf_inner_loop          116  6.6    0.019    0.058  120.866  120.868
 velocity_verlet                     10  3.0    0.001    0.001  113.908  113.929
 dbcsr_multiply_generic            2485 12.5    0.196    0.201   84.884   85.838
 qs_scf_new_mos                     116  7.6    0.001    0.001   82.424   82.696
 qs_scf_loop_do_ot                  116  8.6    0.002    0.004   82.424   82.696
 ot_scf_mini                        116  9.6    0.013    0.043   78.301   78.618
 multiply_cannon                   2485 13.5    0.495    0.518   64.904   69.320
 multiply_cannon_loop              2485 14.5    0.848    0.874   61.801   63.897
 ot_mini                            116 10.6    0.001    0.001   44.562   44.886
 init_scf_loop                       11  6.9    0.025    0.121   36.233   36.234
 mp_waitall_1                    169034 16.6   26.499   35.490   26.499   35.490
 rebuild_ks_matrix                  127  8.3    0.001    0.001   31.393   31.857
 qs_ks_build_kohn_sham_matrix       127  9.3    0.020    0.036   31.392   31.856
 prepare_preconditioner              11  7.9    0.003    0.024   31.608   31.665
 make_preconditioner                 11  8.9    0.010    0.045   31.604   31.665
 make_full_inverse_cholesky          11  9.9    0.000    0.000   29.231   30.662
 qs_ks_update_qs_env                127  7.6    0.001    0.001   28.316   28.734
 multiply_cannon_multrec          19880 15.5   13.397   16.505   23.820   26.915
 multiply_cannon_metrocomm3       19880 15.5    0.059    0.063   16.521   25.844
 qs_ot_get_derivative               116 11.6    0.002    0.002   24.918   25.250
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.793   20.755
 apply_single                       127 13.6    0.001    0.001   19.792   20.755
 qs_ot_get_p                        127 10.4    0.001    0.001   19.171   19.567
 ot_diis_step                       116 11.6    0.018    0.018   19.545   19.545
 multiply_cannon_sync_h2d         19880 15.5   14.181   15.805   14.181   15.805
 make_m2s                          4970 13.5    0.080    0.085   15.017   15.798
 make_images                       4970 14.5    1.153    1.250   14.783   15.561
 qs_ot_p2m_diag                      82 11.4    0.262    0.269   15.005   15.022
 sum_up_and_integrate               127 10.3    0.132    0.155   14.836   14.863
 cp_fm_cholesky_invert               11 10.9   14.771   14.781   14.771   14.781
 integrate_v_rspace                 127 11.3    0.010    0.058   14.704   14.729
 qs_rho_update_rho_low              127  7.7    0.001    0.001   14.554   14.591
 calculate_rho_elec                 127  8.7    0.131    0.146   14.553   14.591
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   13.985   13.987
 init_scf_run                        11  5.9    0.013    0.099   13.950   13.950
 scf_env_initial_rho_setup           11  6.9    0.004    0.022   13.937   13.950
 cp_fm_diag_elpa                     82 13.4    0.000    0.001   10.963   10.965
 cp_fm_redistribute_end              82 14.4    4.146   10.906    4.160   10.909
 cp_fm_diag_elpa_base                82 14.4    6.317   10.310    6.726   10.782
 make_images_data                  4970 15.5    0.059    0.068    9.283   10.467
 dbcsr_mm_accdrv_process          41158 16.2    5.279    6.273    9.885   10.236
 hybrid_alltoall_any               5155 16.4    0.429    1.968    8.102    9.719
 density_rs2pw                      127  9.7    0.006    0.006    7.435    9.358
 pw_transfer                       1535 11.6    0.085    0.105    9.217    9.343
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    8.995    9.127
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    8.798    9.024
 multiply_cannon_metrocomm4       17395 15.5    0.062    0.071    3.406    8.971
 mp_irecv_dv                      49801 16.2    3.283    8.725    3.283    8.725
 fft_wrap_pw1pw2_140                519 13.2    0.475    0.519    7.941    8.091
 grid_integrate_task_list           127 12.3    7.241    7.770    7.241    7.770
 cp_fm_upper_to_full                104 14.5    5.812    7.583    5.812    7.583
 cp_fm_cholesky_decompose            22 10.9    7.511    7.565    7.511    7.565
 wfi_extrapolate                     11  7.9    0.001    0.001    7.490    7.490
 fft3d_ps                          1281 14.7    2.651    2.870    7.270    7.360
 rs_pw_transfer                    1038 11.9    0.013    0.014    5.705    7.324
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.270    6.995
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.497    6.502
 calculate_dm_sparse                127  9.5    0.001    0.001    6.350    6.444
 dbcsr_complete_redistribute        393 12.7    1.205    1.366    4.659    6.415
 calculate_first_density_matrix       1  7.0    0.051    0.083    6.163    6.181
 grid_collocate_task_list           127  9.7    4.881    5.760    4.881    5.760
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.436    5.564
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002    3.472    5.224
 mp_alltoall_d11v                  2401 14.1    4.320    5.032    4.320    5.032
 potential_pw2rs                    127 12.3    0.020    0.022    4.954    4.973
 mp_allgather_i34                  2485 14.5    1.634    4.661    1.634    4.661
 mp_sum_l                          7804 13.0    3.260    4.609    3.260    4.609
 mp_waitany                       11660 13.9    2.784    4.462    2.784    4.462
 qs_energies_init_hamiltonians       11  5.9    0.003    0.021    4.399    4.452
 mp_sum_d                          4444 12.1    2.832    4.078    2.832    4.078
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    2.343    4.039
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    3.985    4.014
 rs_pw_transfer_RS2PW_140           138 11.5    0.353    0.418    2.368    3.950
 mp_alltoall_i22                    712 14.1    1.942    3.821    1.942    3.821
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=187.510000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=903.727273, yerr=9.086362
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430456039424       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992003932160       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613072052224       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239176077312       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239176077312       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.233020E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.387242E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806383904       0.0%      0.0%    100.0%
 number of processed stacks               6026880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1129.3
 marketing flops                   145.651870E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank               1.138762E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1153224
 MPI messages size (bytes):
  total size                         2.039489E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.768511E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              322096              36390305792
    131072 < size <=  4194304              721976             792118951936
   4194304 < size <= 16777216               70800             669922227920
  16777216 < size                           30960             541065216000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4127                  56626.
 MP_Allreduce        11356                   1063.
 MP_Sync               172
 MP_Alltoall          1724               12509604.
 MP_SendRecv          5934                  75008.
 MP_ISendRecv         5934                  75008.
 MP_Wait             22612
 MP_comm_split          84
 MP_ISend            15064                 244788.
 MP_IRecv            15064                 244788.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.165    0.301  196.888  196.952
 qs_mol_dyn_low                       1  2.0    0.004    0.010  195.230  195.243
 qs_forces                           11  3.9    0.026    0.152  194.018  194.026
 qs_energies                         11  4.9    0.038    0.092  186.696  186.720
 scf_env_do_scf                      11  5.9    0.064    0.444  166.558  166.565
 velocity_verlet                     10  3.0    0.001    0.001  127.576  127.624
 scf_env_do_scf_inner_loop          118  6.6    0.038    0.088  119.156  119.166
 dbcsr_multiply_generic            2529 12.6    0.331    0.493   81.838   82.885
 qs_scf_new_mos                     118  7.6    0.001    0.001   80.692   80.984
 qs_scf_loop_do_ot                  118  8.6    0.010    0.025   80.691   80.983
 ot_scf_mini                        118  9.6    0.007    0.015   76.157   76.429
 multiply_cannon                   2529 13.6    0.562    0.604   56.087   59.310
 multiply_cannon_loop              2529 14.6    1.187    1.219   52.478   54.940
 init_scf_loop                       11  6.9    0.015    0.043   46.850   46.851
 ot_mini                            118 10.6    0.001    0.001   43.138   43.392
 prepare_preconditioner              11  7.9    0.001    0.003   42.413   42.439
 make_preconditioner                 11  8.9    0.015    0.040   42.413   42.439
 make_full_inverse_cholesky          11  9.9    0.000    0.000   36.003   41.047
 multiply_cannon_multrec          30348 15.6   14.443   19.227   27.256   31.493
 rebuild_ks_matrix                  129  8.3    0.001    0.001   30.794   31.058
 qs_ks_build_kohn_sham_matrix       129  9.3    0.023    0.059   30.793   31.058
 qs_ks_update_qs_env                129  7.6    0.001    0.001   27.817   28.053
 mp_waitall_1                    149172 16.7   17.918   27.326   17.918   27.326
 qs_ot_get_derivative               118 11.6    0.001    0.002   23.357   23.633
 make_m2s                          5058 13.6    0.097    0.103   21.097   22.201
 make_images                       5058 14.6    1.969    2.273   20.786   21.890
 apply_preconditioner_dbcsr         129 12.6    0.000    0.001   19.119   19.809
 apply_single                       129 13.6    0.001    0.001   19.118   19.808
 ot_diis_step                       118 11.6    0.018    0.019   19.649   19.651
 qs_ot_get_p                        129 10.4    0.001    0.001   19.101   19.367
 cp_fm_upper_to_full                106 14.7   11.082   16.339   11.082   16.339
 cp_fm_cholesky_invert               11 10.9   16.171   16.180   16.171   16.180
 multiply_cannon_metrocomm3       30348 15.6    0.047    0.049    6.432   15.869
 sum_up_and_integrate               129 10.3    0.149    0.196   15.119   15.149
 qs_rho_update_rho_low              129  7.7    0.001    0.001   14.999   15.064
 calculate_rho_elec                 129  8.7    0.176    0.192   14.998   15.063
 integrate_v_rspace                 129 11.3    0.016    0.054   14.969   15.012
 qs_ot_p2m_diag                      84 11.4    0.347    0.394   14.952   15.003
 cp_dbcsr_syevd                      84 12.4    0.005    0.005   13.661   13.662
 init_scf_run                        11  5.9    0.004    0.011   13.425   13.430
 scf_env_initial_rho_setup           11  6.9    0.026    0.074   13.421   13.430
 dbcsr_complete_redistribute        397 12.7    1.544    1.805    9.153   12.983
 make_images_data                  5058 15.6    0.064    0.068   11.330   12.909
 dbcsr_mm_accdrv_process          62780 16.2    7.759    8.691   12.381   12.856
 multiply_cannon_sync_h2d         30348 15.6   11.816   12.580   11.816   12.580
 hybrid_alltoall_any               5245 16.5    0.530    2.224    9.982   11.871
 copy_fm_to_dbcsr                   210 11.7    0.001    0.002    7.799   11.616
 cp_fm_diag_elpa                     84 13.4    0.000    0.001   10.562   10.564
 cp_fm_redistribute_end              84 14.4    1.832   10.492    1.846   10.499
 cp_fm_diag_elpa_base                84 14.4    8.030    9.862    8.616   10.354
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.372   10.074
 mp_alltoall_i22                    720 14.1    5.979    9.954    5.979    9.954
 pw_transfer                       1559 11.6    0.089    0.109    9.726    9.828
 qs_ot_get_derivative_diag           78 12.4    0.002    0.002    9.575    9.796
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.012    9.496    9.604
 density_rs2pw                      129  9.7    0.006    0.006    7.308    8.984
 fft_wrap_pw1pw2_140                527 13.2    0.490    0.499    8.419    8.552
 grid_integrate_task_list           129 12.3    7.541    8.000    7.541    8.000
 fft3d_ps                          1301 14.7    2.796    2.889    7.706    7.782
 wfi_extrapolate                     11  7.9    0.001    0.001    7.717    7.717
 cp_fm_cholesky_decompose            22 10.9    7.602    7.699    7.602    7.699
 calculate_dm_sparse                129  9.5    0.001    0.001    6.912    7.024
 rs_pw_transfer                    1054 12.0    0.013    0.014    4.952    6.812
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.344    6.391
 multiply_cannon_metrocomm4       25290 15.6    0.077    0.085    2.695    6.343
 mp_irecv_dv                      76751 16.2    2.550    6.087    2.550    6.087
 mp_alltoall_d11v                  2429 14.1    4.977    5.862    4.977    5.862
 grid_collocate_task_list           129  9.7    5.127    5.838    5.127    5.838
 calculate_first_density_matrix       1  7.0    0.023    0.060    5.405    5.444
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.091    5.149
 qs_energies_init_hamiltonians       11  5.9    0.013    0.051    4.940    4.963
 potential_pw2rs                    129 12.3    0.023    0.023    4.798    4.833
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.526    4.610
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    4.205    4.274
 mp_waitany                       15064 13.9    2.467    4.208    2.467    4.208
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=196.952000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1076.636364, yerr=18.281997
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022950912       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963542011904       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444706349056       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019182452736       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019182452736       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796564E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.820057E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499488       0.0%      0.0%    100.0%
 number of processed stacks               1944496       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3448.5
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               1.501704E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  238560
 MPI messages size (bytes):
  total size                         1.321104E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.537828E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              112800              59139686400
   4194304 < size <= 16777216              104112             545846722560
  16777216 < size                           20064             716108490000
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8852                     52.
 MP_Alltoall          9584                 804353.
 MP_ISend            39716                2104723.
 MP_IRecv            39716                2103824.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3995                  58308.
 MP_Allreduce        10985                   1176.
 MP_Sync                86
 MP_Alltoall          1700               18828162.
 MP_SendRecv          3810                 122880.
 MP_ISendRecv         3810                 122880.
 MP_Wait             16000
 MP_ISend            10600                 423612.
 MP_IRecv            10600                 423612.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.395    0.432  177.552  177.561
 qs_mol_dyn_low                       1  2.0    0.003    0.003  176.168  176.180
 qs_forces                           11  3.9    0.004    0.005  175.925  175.932
 qs_energies                         11  4.9    0.016    0.016  168.357  168.365
 scf_env_do_scf                      11  5.9    0.001    0.001  148.688  148.694
 velocity_verlet                     10  3.0    0.001    0.001  113.114  113.126
 scf_env_do_scf_inner_loop          116  6.6    0.018    0.022  112.324  112.325
 dbcsr_multiply_generic            2485 12.5    0.258    0.319   74.548   75.035
 qs_scf_new_mos                     116  7.6    0.001    0.001   74.139   74.224
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   74.138   74.224
 ot_scf_mini                        116  9.6    0.003    0.004   69.848   69.918
 multiply_cannon                   2485 13.5    0.581    0.621   55.440   59.482
 multiply_cannon_loop              2485 14.5    0.442    0.454   50.832   51.791
 ot_mini                            116 10.6    0.001    0.001   39.567   39.649
 init_scf_loop                       11  6.9    0.000    0.000   36.220   36.222
 prepare_preconditioner              11  7.9    0.000    0.000   32.237   32.267
 make_preconditioner                 11  8.9    0.000    0.000   32.237   32.267
 mp_waitall_1                    124680 16.7   24.748   32.144   24.748   32.144
 make_full_inverse_cholesky          11  9.9    0.000    0.000   30.093   30.375
 rebuild_ks_matrix                  127  8.3    0.001    0.001   29.528   29.625
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.018   29.527   29.624
 qs_ks_update_qs_env                127  7.6    0.001    0.001   26.792   26.882
 multiply_cannon_multrec           9940 15.5   10.458   14.259   19.064   21.945
 qs_ot_get_derivative               116 11.6    0.001    0.002   19.830   19.908
 ot_diis_step                       116 11.6    0.019    0.020   19.663   19.664
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.298   19.612
 apply_single                       127 13.6    0.001    0.001   19.298   19.612
 multiply_cannon_metrocomm3        9940 15.5    0.023    0.025   12.324   18.772
 cp_fm_cholesky_invert               11 10.9   18.539   18.545   18.539   18.545
 make_m2s                          4970 13.5    0.065    0.070   15.509   17.938
 qs_ot_get_p                        127 10.4    0.001    0.001   17.719   17.814
 make_images                       4970 14.5    2.239    2.570   15.203   17.638
 qs_rho_update_rho_low              127  7.7    0.001    0.001   15.263   15.312
 calculate_rho_elec                 127  8.7    0.257    0.266   15.262   15.312
 sum_up_and_integrate               127 10.3    0.178    0.187   14.927   14.970
 integrate_v_rspace                 127 11.3    0.004    0.004   14.749   14.800
 qs_ot_p2m_diag                      82 11.4    0.489    0.495   14.059   14.075
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   12.945   12.946
 init_scf_run                        11  5.9    0.000    0.001   12.424   12.424
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   12.423   12.424
 multiply_cannon_sync_h2d          9940 15.5   11.559   12.017   11.559   12.017
 make_images_data                  4970 15.5    0.050    0.059    9.227   11.592
 hybrid_alltoall_any               5155 16.4    0.838    3.781    9.102   11.430
 cp_fm_diag_elpa                     82 13.4    0.000    0.000    9.910    9.922
 cp_fm_diag_elpa_base                82 14.4    9.658    9.749    9.901    9.912
 pw_transfer                       1535 11.6    0.085    0.094    9.629    9.672
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    9.408    9.457
 dbcsr_mm_accdrv_process          20590 16.1    2.968    3.968    8.238    8.959
 fft_wrap_pw1pw2_140                519 13.2    0.492    0.514    8.282    8.332
 grid_integrate_task_list           127 12.3    7.651    8.215    7.651    8.215
 cp_fm_cholesky_decompose            22 10.9    8.060    8.152    8.060    8.152
 density_rs2pw                      127  9.7    0.005    0.006    6.917    7.790
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    7.698    7.749
 fft3d_ps                          1281 14.7    2.682    2.767    7.585    7.601
 wfi_extrapolate                     11  7.9    0.001    0.001    7.272    7.272
 mp_allgather_i34                  2485 14.5    2.822    6.947    2.822    6.947
 multiply_cannon_metrocomm1        9940 15.5    0.028    0.028    3.949    6.451
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.332    6.345
 calculate_dm_sparse                127  9.5    0.001    0.001    6.269    6.342
 grid_collocate_task_list           127  9.7    5.351    6.026    5.351    6.026
 qs_energies_init_hamiltonians       11  5.9    0.002    0.003    5.742    5.743
 dbcsr_complete_redistribute        393 12.7    2.146    2.212    5.275    5.701
 mp_alltoall_d11v                  2401 14.1    4.930    5.627    4.930    5.627
 rs_pw_transfer                    1038 11.9    0.012    0.013    4.431    5.343
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.176    5.222
 calculate_first_density_matrix       1  7.0    0.031    0.031    4.939    4.940
 potential_pw2rs                    127 12.3    0.026    0.027    4.548    4.562
 multiply_cannon_metrocomm4        7455 15.5    0.023    0.026    1.836    4.015
 mp_irecv_dv                      28618 15.9    1.799    3.947    1.799    3.947
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.573    3.882
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002    3.507    3.854
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.718    3.744
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    3.679    3.722
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.633    3.642
 copy_dbcsr_to_fm                   185 11.7    0.004    0.004    3.498    3.617
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    3.360    3.559
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=177.561000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1394.909091, yerr=37.342285
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.696234E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks               1964048       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3439.8
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               3.066548E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  100280
 MPI messages size (bytes):
  total size                         1.136195E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.330227E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               45208              35089547264
   4194304 < size <= 16777216               44352             379752284160
  16777216 < size                           10104             721350232272
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  59142.
 MP_Allreduce        11002                   1514.
 MP_Sync                87
 MP_Alltoall          1712               36974125.
 MP_SendRecv          1792                 218624.
 MP_ISendRecv         1792                 218624.
 MP_Wait              9802
 MP_ISend             6408                1080322.
 MP_IRecv             6408                1080322.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.129    0.177  305.356  305.367
 qs_mol_dyn_low                       1  2.0    0.003    0.003  304.237  304.248
 qs_forces                           11  3.9    0.006    0.008  303.928  303.930
 qs_energies                         11  4.9    0.052    0.056  294.519  294.526
 scf_env_do_scf                      11  5.9    0.001    0.001  268.741  268.752
 velocity_verlet                     10  3.0    0.001    0.001  217.203  217.238
 scf_env_do_scf_inner_loop          117  6.6    0.023    0.028  143.415  143.416
 init_scf_loop                       11  6.9    0.000    0.000  125.074  125.075
 prepare_preconditioner              11  7.9    0.000    0.000  119.857  119.887
 make_preconditioner                 11  8.9    0.000    0.000  119.857  119.887
 make_full_inverse_cholesky          11  9.9    0.000    0.000   96.207  117.007
 qs_scf_new_mos                     117  7.6    0.001    0.001   92.301   92.454
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   92.300   92.453
 dbcsr_multiply_generic            2507 12.6    0.256    0.263   86.850   87.564
 ot_scf_mini                        117  9.6    0.004    0.004   87.442   87.537
 cp_fm_upper_to_full                105 14.8   53.240   75.769   53.240   75.769
 multiply_cannon                   2507 13.6    0.708    0.756   61.612   62.158
 multiply_cannon_loop              2507 14.6    0.471    0.481   57.709   59.459
 ot_mini                            117 10.6    0.001    0.001   46.035   46.145
 dbcsr_complete_redistribute        395 12.7    4.004    4.065   29.287   42.045
 copy_fm_to_dbcsr                   209 11.7    0.001    0.002   25.923   38.731
 rebuild_ks_matrix                  128  8.3    0.001    0.001   38.606   38.724
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.017   38.606   38.723
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000   23.605   36.352
 qs_ks_update_qs_env                128  7.6    0.001    0.001   35.529   35.633
 mp_alltoall_i22                    716 14.1   21.397   34.076   21.397   34.076
 mp_waitall_1                    103674 16.8   29.767   34.075   29.767   34.075
 cp_fm_cholesky_invert               11 10.9   33.771   33.778   33.771   33.778
 qs_ot_get_p                        128 10.4    0.001    0.001   25.767   25.854
 qs_ot_get_derivative               117 11.6    0.002    0.002   24.900   24.986
 qs_ot_p2m_diag                      83 11.4    0.878    0.883   21.677   21.707
 make_m2s                          5014 13.6    0.075    0.078   20.465   21.623
 multiply_cannon_metrocomm3       10028 15.6    0.023    0.024   19.819   21.365
 make_images                       5014 14.6    3.752    3.844   19.984   21.144
 ot_diis_step                       117 11.6    0.022    0.023   21.091   21.091
 qs_rho_update_rho_low              128  7.7    0.001    0.001   20.807   20.824
 calculate_rho_elec                 128  8.7    0.483    0.484   20.807   20.823
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   20.277   20.626
 apply_single                       128 13.6    0.001    0.001   20.277   20.626
 cp_dbcsr_syevd                      83 12.4    0.006    0.006   19.952   19.954
 multiply_cannon_multrec          10028 15.6   10.571   12.321   19.536   19.756
 sum_up_and_integrate               128 10.3    0.321    0.323   19.580   19.662
 integrate_v_rspace                 128 11.3    0.004    0.004   19.259   19.339
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   16.750   16.750
 cp_fm_diag_elpa_base                83 14.4   12.387   13.858   16.745   16.745
 multiply_cannon_sync_h2d         10028 15.6   15.662   15.682   15.662   15.682
 init_scf_run                        11  5.9    0.000    0.001   15.059   15.059
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   15.058   15.059
 hybrid_alltoall_any               5200 16.5    1.298    3.040   11.306   13.380
 make_images_data                  5014 15.6    0.058    0.063   11.132   13.158
 pw_transfer                       1547 11.6    0.093    0.094   12.413   12.420
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.011   12.178   12.185
 fft_wrap_pw1pw2_140                523 13.2    0.542    0.545   10.812   10.825
 dbcsr_mm_accdrv_process          20762 16.1    3.952    6.052    8.725   10.520
 fft3d_ps                          1291 14.7    2.740    2.746   10.215   10.226
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.505    9.574
 wfi_extrapolate                     11  7.9    0.001    0.001    9.183    9.183
 mp_alltoall_d11v                  2415 14.1    8.060    8.992    8.060    8.992
 cp_fm_cholesky_decompose            22 10.9    8.942    8.980    8.942    8.980
 grid_integrate_task_list           128 12.3    8.566    8.715    8.566    8.715
 density_rs2pw                      128  9.7    0.005    0.005    8.398    8.545
 qs_energies_init_hamiltonians       11  5.9    0.015    0.019    8.221    8.224
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    7.726    7.785
 calculate_dm_sparse                128  9.5    0.001    0.001    6.969    7.041
 grid_collocate_task_list           128  9.7    6.357    6.377    6.357    6.377
 rs_scatter_matrices                139  9.7    3.616    4.536    6.025    6.254
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    6.049    6.122
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=305.367000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2744.454545, yerr=155.430992
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.262146E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255669.
 MP_Allreduce         3059                   6274.
 MP_Sync                 4
 MP_Alltoall            54                6805335.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.207    0.360   89.378   89.429
 qs_energies                          1  2.0    0.000    0.000   87.112   87.179
 ls_scf                               1  3.0    0.003    0.021   85.942   86.007
 dbcsr_multiply_generic             111  6.7    0.015    0.023   72.675   72.806
 multiply_cannon                    111  7.7    0.017    0.020   55.840   57.082
 multiply_cannon_loop               111  8.7    0.210    0.226   52.439   53.684
 ls_scf_main                          1  4.0    0.000    0.000   52.447   52.450
 density_matrix_trs4                  2  5.0    0.002    0.003   46.782   46.873
 ls_scf_init_scf                      1  4.0    0.013    0.046   30.409   30.411
 ls_scf_init_matrix_S                 1  5.0    0.002    0.019   27.529   27.583
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   25.275   25.292
 mp_waitall_1                     11031 10.9   22.509   25.196   22.509   25.196
 multiply_cannon_multrec           2664  9.7    8.153    8.896   15.449   17.144
 multiply_cannon_sync_h2d          2664  9.7   13.623   15.692   13.623   15.692
 make_m2s                           222  7.7    0.008    0.011   13.131   13.657
 make_images                        222  8.7    0.098    0.108   13.109   13.638
 multiply_cannon_metrocomm1        2664  9.7    0.009    0.010    9.687   12.951
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.497    8.835
 make_images_data                   222  9.7    0.004    0.005    7.654    8.291
 hybrid_alltoall_any                227 10.6    0.215    1.845    6.615    8.146
 dbcsr_mm_accdrv_process           4760 10.4    0.511    0.614    6.912    7.871
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.197    7.072    6.197    7.072
 calculate_norms                   4752  9.8    5.517    6.110    5.517    6.110
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.037    5.229
 mp_sum_l                           807  5.4    3.617    5.054    3.617    5.054
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.015    2.054    3.625
 mp_irecv_dv                       6231 10.9    2.036    3.603    2.036    3.603
 make_images_sizes                  222  9.7    0.000    0.000    0.781    3.594
 mp_alltoall_i44                    222 10.7    0.781    3.593    0.781    3.593
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.369    3.436
 arnoldi_extremal                     4  6.8    0.000    0.000    3.260    3.280
 arnoldi_normal_ev                    4  7.8    0.008    0.048    3.260    3.280
 ls_scf_post                          1  4.0    0.007    0.057    3.083    3.149
 build_subspace                      16  8.4    0.009    0.012    3.124    3.126
 ls_scf_initial_guess                 1  5.0    0.000    0.000    2.866    2.940
 ls_scf_qs_atomic_guess               1  6.0    0.010    0.108    2.866    2.940
 ls_scf_store_result                  1  5.0    0.000    0.000    2.851    2.885
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.340    2.825
 dbcsr_merge_single_wm              555 10.7    0.453    0.585    2.333    2.816
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.815    2.816
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.806    2.807
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    2.806    2.807
 make_images_pack                   222  9.7    2.210    2.626    2.212    2.628
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.013    2.319    2.567
 dbcsr_sort_data                    658 11.4    2.138    2.554    2.138    2.554
 ls_scf_dm_to_ks                      2  5.0    0.016    0.160    2.404    2.478
 dbcsr_matrix_vector_mult_local     304 10.0    2.067    2.467    2.069    2.469
 mp_sum_d                          1057  6.0    1.903    2.380    1.903    2.380
 buffer_matrices_ensure_size        222  8.7    1.751    2.083    1.751    2.083
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=89.429000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1142.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.111570E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  10339.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.446    1.210   96.325   96.708
 qs_energies                          1  2.0    0.000    0.000   94.011   94.029
 ls_scf                               1  3.0    0.005    0.015   91.986   92.003
 dbcsr_multiply_generic             111  6.7    0.015    0.016   75.792   76.101
 multiply_cannon                    111  7.7    0.027    0.043   53.402   57.971
 ls_scf_main                          1  4.0    0.000    0.000   55.196   55.202
 multiply_cannon_loop               111  8.7    0.116    0.123   50.093   53.822
 density_matrix_trs4                  2  5.0    0.002    0.003   49.462   49.630
 ls_scf_init_scf                      1  4.0    0.018    0.027   32.773   32.794
 ls_scf_init_matrix_S                 1  5.0    0.001    0.004   30.343   30.439
 mp_waitall_1                      9105 10.9   21.401   30.319   21.401   30.319
 matrix_sqrt_Newton_Schulz            2  6.5    0.004    0.028   27.768   27.783
 multiply_cannon_multrec           1332  9.7   13.102   16.333   22.298   26.717
 multiply_cannon_metrocomm3        1332  9.7    0.006    0.008   11.922   21.087
 make_m2s                           222  7.7    0.006    0.007   15.282   15.965
 make_images                        222  8.7    1.573    1.929   15.252   15.936
 dbcsr_mm_accdrv_process           4041 10.4    0.296    0.501    8.796   10.382
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.369    9.928    8.369    9.928
 make_images_data                   222  9.7    0.004    0.004    8.833    9.769
 hybrid_alltoall_any                227 10.6    0.520    2.455    8.362    9.406
 mp_sum_l                           807  5.4    6.053    9.295    6.053    9.295
 multiply_cannon_metrocomm4        1221  9.7    0.007    0.008    3.222    7.724
 mp_irecv_dv                       3311 11.0    3.203    7.663    3.203    7.663
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.576    7.159
 calculate_norms                   2376  9.8    6.011    6.720    6.011    6.720
 multiply_cannon_sync_h2d          1332  9.7    4.788    6.068    4.788    6.068
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.026    5.197
 arnoldi_extremal                     4  6.8    0.001    0.004    5.001    5.028
 arnoldi_normal_ev                    4  7.8    0.016    0.106    5.000    5.027
 build_subspace                      16  8.4    0.014    0.023    4.508    4.523
 ls_scf_post                          1  4.0    0.013    0.056    4.011    4.030
 dbcsr_matrix_vector_mult           304  9.0    0.009    0.021    3.134    3.365
 ls_scf_store_result                  1  5.0    0.000    0.000    3.228    3.326
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.277    3.304
 dbcsr_matrix_vector_mult_local     304 10.0    2.726    3.210    2.728    3.212
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.971    2.973
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.958    2.960
 qs_ks_build_kohn_sham_matrix         3  8.3    0.161    0.436    2.958    2.960
 ls_scf_dm_to_ks                      2  5.0    0.001    0.015    2.625    2.713
 ls_scf_initial_guess                 1  5.0    0.000    0.000    2.412    2.507
 ls_scf_qs_atomic_guess               1  6.0    0.012    0.102    2.412    2.507
 mp_allgather_i34                   111  8.7    1.010    2.466    1.010    2.466
 make_images_pack                   222  9.7    2.034    2.381    2.036    2.384
 mp_sum_d                          1055  6.0    1.382    2.119    1.382    2.119
 dbcsr_sort_data                    436 11.2    1.822    2.045    1.822    2.045
 qs_energies_init_hamiltonians        1  3.0    0.011    0.028    2.019    2.040
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=96.708000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1733.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.684813E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265470.
 MP_Allreduce         3058                  11181.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.244    0.803  101.751  101.753
 qs_energies                          1  2.0    0.000    0.000   98.653   98.696
 ls_scf                               1  3.0    0.003    0.021   95.969   96.003
 dbcsr_multiply_generic             111  6.7    0.015    0.016   77.495   77.794
 multiply_cannon                    111  7.7    0.044    0.094   53.731   58.303
 ls_scf_main                          1  4.0    0.000    0.000   57.340   57.346
 multiply_cannon_loop               111  8.7    0.100    0.106   50.147   53.756
 density_matrix_trs4                  2  5.0    0.002    0.003   51.316   51.505
 ls_scf_init_scf                      1  4.0    0.012    0.062   34.315   34.347
 mp_waitall_1                      7281 11.0   24.415   34.327   24.415   34.327
 ls_scf_init_matrix_S                 1  5.0    0.007    0.058   31.362   31.487
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.723   27.734
 multiply_cannon_multrec            888  9.7   12.588   15.126   21.498   24.904
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   11.485   23.424
 make_m2s                           222  7.7    0.006    0.007   17.165   18.339
 make_images                        222  8.7    1.968    2.291   17.127   18.301
 hybrid_alltoall_any                227 10.6    0.619    2.868    9.622   10.980
 make_images_data                   222  9.7    0.003    0.004    9.823   10.867
 dbcsr_mm_accdrv_process           3754 10.4    0.406    1.031    8.439    9.730
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.657    8.834    7.657    8.834
 mp_sum_l                           807  5.4    5.169    8.470    5.169    8.470
 multiply_cannon_sync_h2d           888  9.7    6.014    7.252    6.014    7.252
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.471    7.219
 mp_irecv_dv                       2335 11.1    2.456    7.171    2.456    7.171
 multiply_cannon_metrocomm1         888  9.7    0.002    0.003    3.729    6.822
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.860    6.645
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.610    5.810
 arnoldi_extremal                     4  6.8    0.000    0.000    5.096    5.117
 arnoldi_normal_ev                    4  7.8    0.004    0.017    5.096    5.117
 build_subspace                      16  8.4    0.014    0.020    4.787    4.794
 calculate_norms                   1584  9.8    4.322    4.668    4.322    4.668
 ls_scf_post                          1  4.0    0.018    0.178    4.311    4.364
 mp_allgather_i34                   111  8.7    1.413    3.916    1.413    3.916
 dbcsr_matrix_vector_mult           304  9.0    0.009    0.021    3.426    3.761
 dbcsr_matrix_vector_mult_local     304 10.0    3.002    3.573    3.004    3.575
 ls_scf_store_result                  1  5.0    0.000    0.000    3.338    3.430
 ls_scf_initial_guess                 1  5.0    0.000    0.000    2.933    3.029
 ls_scf_qs_atomic_guess               1  6.0    0.216    0.407    2.933    3.029
 ls_scf_dm_to_ks                      2  5.0    0.001    0.011    2.884    2.982
 qs_energies_init_hamiltonians        1  3.0    0.143    0.258    2.675    2.877
 mp_sum_d                          1055  6.0    1.912    2.756    1.912    2.756
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.607    2.608
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.588    2.590
 qs_ks_build_kohn_sham_matrix         3  8.3    0.006    0.048    2.588    2.590
 rs_pw_transfer                      27 10.4    0.000    0.000    1.770    2.272
 dbcsr_sort_data                    325 11.1    1.899    2.159    1.899    2.159
 make_images_pack                   222  9.7    1.825    2.127    1.828    2.129
 mp_waitany                         462 12.2    1.616    2.107    1.616    2.107
 rs_pw_transfer_RS2PW_150             5  9.2    0.038    0.045    1.583    2.085
 make_images_sizes                  222  9.7    0.000    0.000    1.012    2.053
 mp_alltoall_i44                    222 10.7    1.011    2.053    1.011    2.053
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=101.753000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2166.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.318796E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  13371.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.205    0.655  102.320  102.326
 qs_energies                          1  2.0    0.000    0.000   99.325   99.331
 ls_scf                               1  3.0    0.000    0.002   96.858   96.888
 dbcsr_multiply_generic             111  6.7    0.016    0.018   79.581   79.844
 ls_scf_main                          1  4.0    0.000    0.000   58.800   58.801
 multiply_cannon                    111  7.7    0.051    0.095   52.580   57.629
 density_matrix_trs4                  2  5.0    0.002    0.003   52.629   52.746
 multiply_cannon_loop               111  8.7    0.114    0.126   47.469   51.039
 ls_scf_init_scf                      1  4.0    0.025    0.100   34.810   34.835
 ls_scf_init_matrix_S                 1  5.0    0.006    0.047   33.228   33.341
 mp_waitall_1                      6369 11.0   23.367   30.379   23.367   30.379
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.599   29.612
 multiply_cannon_multrec           1332  9.7   14.087   17.689   22.337   25.183
 make_m2s                           222  7.7    0.006    0.008   21.080   22.544
 make_images                        222  8.7    3.139    3.611   21.030   22.496
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.003    9.798   17.961
 make_images_data                   222  9.7    0.004    0.004   11.742   13.383
 hybrid_alltoall_any                227 10.6    0.797    3.766   11.213   13.068
 dbcsr_mm_accdrv_process           3641 10.4    0.318    0.880    7.882    9.549
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.083    8.531    7.083    8.531
 mp_sum_l                           807  5.4    4.397    7.651    4.397    7.651
 multiply_cannon_sync_h2d          1332  9.7    5.466    6.126    5.466    6.126
 multiply_cannon_metrocomm4        1110  9.7    0.004    0.006    2.079    6.031
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.350    5.961
 mp_irecv_dv                       3229 10.9    2.058    5.959    2.058    5.959
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.214    5.390
 arnoldi_extremal                     4  6.8    0.000    0.000    5.203    5.217
 arnoldi_normal_ev                    4  7.8    0.002    0.006    5.203    5.217
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.657    5.215
 build_subspace                      16  8.4    0.014    0.021    4.863    4.868
 mp_allgather_i34                   111  8.7    2.228    4.670    2.228    4.670
 calculate_norms                   2376  9.8    4.191    4.544    4.191    4.544
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.020    3.569    3.845
 dbcsr_matrix_vector_mult_local     304 10.0    3.169    3.663    3.172    3.664
 dbcsr_sort_data                    658 11.4    3.106    3.486    3.106    3.486
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.847    3.258
 ls_scf_post                          1  4.0    0.000    0.002    3.248    3.253
 dbcsr_merge_single_wm              555 10.7    0.538    0.673    2.839    3.250
 ls_scf_dm_to_ks                      2  5.0    0.000    0.001    3.076    3.121
 ls_scf_store_result                  1  5.0    0.000    0.000    2.983    3.047
 qs_energies_init_hamiltonians        1  3.0    0.065    0.132    2.457    2.613
 dbcsr_data_release               10477 10.7    1.581    2.388    1.581    2.388
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.215    2.216
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.191    2.193
 qs_ks_build_kohn_sham_matrix         3  8.3    0.010    0.079    2.191    2.193
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=102.326000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2725.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.635681E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265558.
 MP_Allreduce         3049                  15663.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.095    0.156   98.990   98.991
 qs_energies                          1  2.0    0.000    0.000   97.798   97.805
 ls_scf                               1  3.0    0.000    0.000   95.447   95.457
 dbcsr_multiply_generic             111  6.7    0.017    0.018   76.458   76.631
 ls_scf_main                          1  4.0    0.000    0.000   61.023   61.024
 multiply_cannon                    111  7.7    0.113    0.179   54.414   59.935
 density_matrix_trs4                  2  5.0    0.002    0.003   53.796   53.879
 multiply_cannon_loop               111  8.7    0.070    0.083   49.774   51.573
 ls_scf_init_scf                      1  4.0    0.011    0.017   30.823   30.829
 mp_waitall_1                      5436 11.0   25.073   30.519   25.073   30.519
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.302   29.342
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.127   27.142
 multiply_cannon_multrec            444  9.7   13.969   16.197   21.024   24.205
 make_m2s                           222  7.7    0.004    0.005   17.252   19.825
 make_images                        222  8.7    3.714    4.419   17.190   19.765
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.348   15.568
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    6.016   14.106
 make_images_data                   222  9.7    0.003    0.004    9.514   12.026
 hybrid_alltoall_any                227 10.6    0.791    3.781    9.266   11.753
 multiply_cannon_sync_h2d           444  9.7    6.569    8.769    6.569    8.769
 dbcsr_mm_accdrv_process           3003 10.4    0.200    0.546    6.754    7.834
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.393    7.487    6.393    7.487
 mp_allgather_i34                   111  8.7    2.812    6.960    2.812    6.960
 arnoldi_extremal                     4  6.8    0.000    0.000    5.831    5.867
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.831    5.867
 build_subspace                      16  8.4    0.015    0.020    5.434    5.442
 mp_sum_l                           807  5.4    2.918    5.197    2.918    5.197
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.526    4.689
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.020    4.142    4.335
 dbcsr_matrix_vector_mult_local     304 10.0    3.621    4.094    3.623    4.096
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.626    3.954
 mp_irecv_dv                       1241 11.2    1.606    3.933    1.606    3.933
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.933    3.854
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.669    3.765
 calculate_norms                    792  9.8    3.542    3.701    3.542    3.701
 ls_scf_post                          1  4.0    0.000    0.000    3.600    3.607
 ls_scf_store_result                  1  5.0    0.000    0.000    3.354    3.385
 make_images_sizes                  222  9.7    0.000    0.000    1.100    3.150
 mp_alltoall_i44                    222 10.7    1.100    3.149    1.100    3.149
 qs_energies_init_hamiltonians        1  3.0    0.002    0.009    2.336    2.336
 dbcsr_finalize                     304  7.8    0.062    0.078    2.188    2.258
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.245    2.246
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.210    2.211
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    2.210    2.211
 dbcsr_merge_all                    275  8.9    0.471    0.526    2.040    2.092
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=98.991000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3687.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/c17f64ee903975497a1c6c096a904aa8b2310d59_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.713974E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284111.
 MP_Allreduce         3043                  21950.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.127    0.292  110.640  110.640
 qs_energies                          1  2.0    0.000    0.000  108.835  108.846
 ls_scf                               1  3.0    0.000    0.000  105.845  105.856
 dbcsr_multiply_generic             111  6.7    0.024    0.026   78.470   78.583
 ls_scf_main                          1  4.0    0.000    0.000   65.479   65.480
 density_matrix_trs4                  2  5.0    0.002    0.003   56.201   56.264
 multiply_cannon                    111  7.7    0.128    0.247   51.092   53.110
 multiply_cannon_loop               111  8.7    0.067    0.069   47.618   48.520
 ls_scf_init_scf                      1  4.0    0.021    0.021   36.365   36.366
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   34.413   34.429
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   31.580   31.596
 mp_waitall_1                      4527 11.1   22.311   25.480   22.311   25.480
 make_m2s                           222  7.7    0.005    0.005   23.909   24.896
 make_images                        222  8.7    4.580    4.972   23.803   24.789
 multiply_cannon_multrec            444  9.7   17.879   18.520   22.641   23.382
 hybrid_alltoall_any                227 10.6    1.661    3.629   12.900   15.940
 make_images_data                   222  9.7    0.003    0.003   13.138   15.611
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.403   12.253
 multiply_cannon_sync_h2d           444  9.7    8.847    8.874    8.847    8.874
 arnoldi_extremal                     4  6.8    0.000    0.000    7.294    7.370
 arnoldi_normal_ev                    4  7.8    0.024    0.073    7.294    7.370
 build_subspace                      16  8.4    0.026    0.037    6.704    6.719
 dbcsr_mm_accdrv_process           1814 10.4    0.239    0.343    4.597    5.887
 dbcsr_matrix_vector_mult           304  9.0    0.016    0.032    5.328    5.471
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.242    5.336
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.018    5.283
 dbcsr_matrix_vector_mult_local     304 10.0    4.879    5.166    4.882    5.168
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.111    4.240    4.111    4.240
 ls_scf_post                          1  4.0    0.000    0.000    4.001    4.011
 make_images_sizes                  222  9.7    0.000    0.000    1.439    3.542
 mp_alltoall_i44                    222 10.7    1.439    3.541    1.439    3.541
 mp_allgather_i34                   111  8.7    1.101    3.459    1.101    3.459
 ls_scf_store_result                  1  5.0    0.000    0.000    3.400    3.408
 calculate_norms                    792  9.8    3.243    3.285    3.243    3.285
 dbcsr_finalize                     304  7.8    0.082    0.089    3.082    3.118
 qs_energies_init_hamiltonians        1  3.0    0.009    0.016    2.959    2.959
 dbcsr_merge_all                    275  8.9    0.893    0.916    2.869    2.899
 dbcsr_complete_redistribute          5  7.6    1.435    1.472    2.764    2.873
 dbcsr_data_release               12724 10.6    2.341    2.841    2.341    2.841
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.627    2.629
 compute_matrix_preconditioner        1  6.0    0.002    0.002    2.585    2.593
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.561    2.563
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    2.561    2.563
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.407    2.541
 dbcsr_sort_data                    325 11.1    2.441    2.503    2.441    2.503
 dbcsr_new_transposed                 4  7.5    0.332    0.428    2.475    2.490
 dbcsr_frobenius_norm                74  6.6    2.056    2.138    2.193    2.234
 dbcsr_redistribute                   4  8.5    1.370    1.424    2.114    2.218
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=110.640000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=6803.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: c17f64ee903975497a1c6c096a904aa8b2310d59
Summary: empty
Status: OK