=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 8ce3b69bbf0a9b31bf3df725b357dfb78f348a44


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10,
#              COSMA 2.6.6, ELPA 2023.05.001, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.2.2, LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.8.2,
#              SIRIUS 7.4.3, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Last update: 21.06.2023
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.6
USE_ELPA       := 2023.05.001
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.2.2
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.8.2
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.4.3
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.5
HDF5_VER       := 1.12.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   HDF5_VER       := $(strip $(HDF5_VER))
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(HDF5_VER)/lib
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__HDF5
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/01
 job id: 48362366
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/02
 job id: 48362367
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/03
 job id: 48362368
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/04
 job id: 48362369
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/05
 job id: 48362370
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/06
 job id: 48362371
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/07
 job id: 48362372
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/08
 job id: 48362374
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/09
 job id: 48362375
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/10
 job id: 48362377
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/11
 job id: 48362380
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/12
 job id: 48362381
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/13
 job id: 48362382
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/14
 job id: 48362383
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/15
 job id: 48362384
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/16
 job id: 48362386
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/17
 job id: 48362387
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/18
 job id: 48362388
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/19
 job id: 48362389
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/20
 job id: 48362390
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/21
 job id: 48362391
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/22
 job id: 48362392
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/23
 job id: 48362395
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/24
 job id: 48362396
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/25
 job id: 48362397
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/26
 job id: 48362398
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: 512 H2O (4 NVE MD steps on 64 nodes)
 input file: benchmarks/QS/00512_H2O/H2O-512_md.inp
 required files: []
 output file: result.log
 # nodes = 64
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/27
 job id: 48362399
 --- Point ---
 name: 601
 plot: h2o_512_md
 regex: CP2K  
 label: (64n/12r/1t)
 --- Point ---
 name: 602
 plot: h2o_512_md_mem
 regex: Estimated peak process memory 
 label: (64n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md_mem", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          424                      8.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.022    0.034  134.450  134.450
 farming_run                          1  2.0  133.987  133.989  134.422  134.424
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.497919E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              230                1134128.
 MP_Allreduce          571                1938539.
 MP_Sync                25
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split          10
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.006    0.026  115.473  115.473
 qs_energies                          1  2.0    0.000    0.000  115.273  115.276
 mp2_main                             1  3.0    0.000    0.000  113.155  113.159
 mp2_gpw_main                         1  4.0    0.043    0.066  112.165  112.168
 mp2_ri_gpw_compute_in                1  5.0    0.180    0.202   93.242   93.583
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.005   54.884   55.245
 mp2_eri_3c_integrate_gpw           272  7.0    0.152    0.163   41.281   46.362
 get_2c_integrals                     1  6.0    0.008    0.009   37.379   38.167
 integrate_v_rspace                 273  8.0    0.439    0.453   24.707   29.680
 pw_transfer                       6555 10.6    0.374    0.402   27.425   28.076
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.047   26.011   26.598
 grid_integrate_task_list           273  9.0   20.529   25.979   20.529   25.979
 fft_wrap_pw1pw2_100               2178 12.4    1.226    1.389   23.542   24.106
 compute_2c_integrals                 1  7.0    0.002    0.003   19.656   19.680
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.004   18.993   19.363
 mp2_eri_2c_integrate_gpw             1  9.0    2.346    2.419   18.990   19.361
 rpa_ri_compute_en                    1  5.0    0.027    0.033   18.791   18.999
 cp_fm_cholesky_decompose            12  8.2   17.740   18.544   17.740   18.544
 cholesky_decomp                      1  7.0    0.000    0.000   16.562   17.374
 fft3d_s                           5443 13.4   16.156   16.567   16.178   16.587
 ao_to_mo_and_store_B_mult_1        272  7.0   10.765   15.318   10.765   15.318
 calculate_wavefunction             272  8.0    5.395    5.551   12.485   13.123
 rpa_num_int                          1  6.0    0.000    0.002   10.739   10.740
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.708   10.738
 calc_mat_Q                           8  8.0    0.000    0.000    9.485    9.582
 contract_S_to_Q                      8  9.0    0.000    0.000    8.905    9.004
 calc_potential_gpw                 544  9.5    0.005    0.006    8.278    8.642
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.500    8.600
 parallel_gemm_fm_cosma              14 10.1    8.500    8.600    8.500    8.600
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.002    8.280    8.529
 potential_pw2rs                    545 10.0    0.107    0.110    7.728    8.432
 collocate_single_gaussian          272 10.0    0.039    0.041    7.543    7.740
 create_integ_mat                     1  6.0    0.021    0.028    7.588    7.588
 array2fm                             1  7.0    0.000    0.000    6.576    7.023
 pw_scatter_s                      2720 13.7    4.374    4.547    4.374    4.547
 pw_gather_s                       2722 13.2    3.853    4.115    3.853    4.115
 array2fm_buffer_send                 1  8.0    2.902    3.155    2.902    3.155
 pw_poisson_solve                   545 10.5    1.099    1.145    2.223    2.372
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=112.166127, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2800.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          424                      9.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.031    0.040  422.754  422.756
 farming_run                          1  2.0  421.934  421.941  422.706  422.709
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827128576       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788821       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.228063E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              757                 478553.
 MP_Allreduce         2021                  21391.
 MP_Sync                37
 MP_Alltoall            77
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.033  218.113  218.114
 qs_energies                          1  2.0    0.002    0.015  217.830  217.843
 scf_env_do_scf                       1  3.0    0.000    0.000  115.194  115.195
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  114.248  114.254
 rebuild_ks_matrix                    4  6.0    0.000    0.000  114.247  114.253
 qs_ks_build_kohn_sham_matrix         4  7.0    0.055    0.060  114.247  114.253
 hfx_ks_matrix                        4  8.0    0.001    0.001  113.854  113.858
 integrate_four_center                4  9.0    0.153    0.474  113.853  113.857
 integrate_four_center_main           4 10.0    0.106    0.557  101.871  105.710
 integrate_four_center_bin          264 11.0  101.765  105.451  101.765  105.451
 mp2_main                             1  3.0    0.003    0.032  102.320  102.329
 mp2_gpw_main                         1  4.0    0.055    0.109  101.425  101.440
 init_scf_loop                        1  4.0    0.000    0.000   97.385   97.385
 mp2_ri_gpw_compute_in                1  5.0    0.070    0.109   74.351   75.425
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   54.018   55.086
 mp2_eri_3c_integrate_gpw            91  7.0    0.143    0.158   41.741   46.739
 integrate_v_rspace                  95  8.0    0.397    0.570   28.134   32.961
 pw_transfer                       2240 10.6    0.144    0.175   29.960   30.349
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.022   28.975   29.423
 mp2_ri_gpw_compute_en                1  5.0    0.059    0.071   26.869   28.881
 grid_integrate_task_list            95  9.0   23.422   28.449   23.422   28.449
 fft_wrap_pw1pw2_100                730 12.4    1.268    1.441   26.689   27.239
 ao_to_mo_and_store_B_mult_1         91  7.0   10.589   27.020   10.589   27.020
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.842    1.901   25.096   25.106
 get_2c_integrals                     1  6.0    0.000    0.000   20.241   20.269
 compute_2c_integrals                 1  7.0    0.003    0.009   19.217   19.226
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.018   18.866   19.081
 mp2_eri_2c_integrate_gpw             1  9.0    1.737    1.873   18.863   19.081
 fft3d_s                           1823 13.4   18.450   18.961   18.464   18.974
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.001   17.808   17.808
 calculate_wavefunction              91  8.0    2.002    2.033    9.730    9.975
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.558    0.594    8.782    9.320
 potential_pw2rs                    186 10.0    0.033    0.034    8.635    9.127
 local_gemm                         172  8.0    8.224    8.760    8.224    8.760
 mp2_ri_gpw_compute_en_comm          22  7.0    0.496    0.522    8.080    8.567
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001    8.236    8.513
 calc_potential_gpw                 182  9.5    0.002    0.002    7.914    8.164
 collocate_single_gaussian           91 10.0    0.017    0.029    7.896    8.082
 mp_sync                             37 10.5    3.920    7.912    3.920    7.912
 integrate_four_center_load           4 10.0    0.000    0.000    6.783    6.787
 hfx_load_balance                     1 11.0    0.000    0.000    6.783    6.787
 mp_sendrecv_dm3                   2068  8.0    6.118    6.626    6.118    6.626
 mp2_ri_gpw_compute_en_ener         172  7.0    6.347    6.420    6.347    6.420
 pw_gather_s                        912 13.2    4.926    5.301    4.926    5.301
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=101.418486, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1485.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             452.202496E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62379.
 MP_Allreduce        10329                    270.
 MP_Sync               530
 MP_Alltoall          2083
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.031   56.149   56.151
 qs_mol_dyn_low                       1  2.0    0.003    0.003   55.920   55.930
 qs_forces                           11  3.9    0.002    0.003   55.842   55.843
 qs_energies                         11  4.9    0.001    0.001   54.196   54.206
 scf_env_do_scf                      11  5.9    0.001    0.002   47.780   47.780
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.008   45.052   45.053
 qs_scf_new_mos                     108  7.5    0.000    0.001   32.861   33.145
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   32.861   33.144
 dbcsr_multiply_generic            2286 12.5    0.094    0.098   32.664   33.126
 ot_scf_mini                        108  9.5    0.002    0.002   31.244   31.434
 velocity_verlet                     10  3.0    0.006    0.011   27.964   27.965
 multiply_cannon                   2286 13.5    0.181    0.191   25.581   26.997
 multiply_cannon_loop              2286 14.5    1.829    1.950   24.512   25.932
 ot_mini                            108 10.5    0.001    0.001   18.615   18.879
 qs_ot_get_derivative               108 11.5    0.001    0.001   15.685   15.872
 mp_waitall_1                    245248 16.5    7.957   13.746    7.957   13.746
 multiply_cannon_metrocomm3       54864 15.5    0.073    0.081    5.183   11.748
 multiply_cannon_multrec          54864 15.5    3.728    5.871    7.756   11.118
 rebuild_ks_matrix                  119  8.3    0.000    0.000    9.126    9.292
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.011    9.126    9.292
 qs_ot_get_p                        119 10.4    0.001    0.001    8.096    8.372
 qs_ks_update_qs_env                119  7.6    0.001    0.001    8.040    8.197
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.491    5.949
 multiply_cannon_sync_h2d         54864 15.5    5.193    5.783    5.193    5.783
 mp_sum_l                          7287 12.8    3.988    5.742    3.988    5.742
 dbcsr_mm_accdrv_process          76910 16.1    1.815    2.912    3.943    5.669
 qs_rho_update_rho_low              119  7.7    0.001    0.001    5.489    5.579
 calculate_rho_elec                 119  8.7    0.012    0.017    5.489    5.579
 sum_up_and_integrate               119 10.3    0.012    0.014    5.519    5.525
 integrate_v_rspace                 119 11.3    0.002    0.003    5.507    5.515
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    5.190    5.213
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    4.931    5.026
 init_scf_run                        11  5.9    0.000    0.001    5.009    5.009
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    5.009    5.009
 cp_dbcsr_syevd                      50 12.0    0.002    0.003    4.465    4.466
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    4.127    4.128
 cp_fm_redistribute_end              50 14.0    2.112    4.097    2.118    4.100
 cp_fm_diag_elpa_base                50 14.0    1.973    3.971    1.978    3.980
 rs_pw_transfer                     974 11.9    0.011    0.013    3.843    3.978
 density_rs2pw                      119  9.7    0.004    0.004    3.317    3.416
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.986    3.183
 apply_single                       119 13.6    0.000    0.000    2.986    3.182
 calculate_dm_sparse                119  9.5    0.000    0.001    2.926    3.066
 make_m2s                          4572 13.5    0.053    0.056    2.776    2.898
 pw_transfer                       1439 11.6    0.052    0.057    2.772    2.890
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    2.696    2.817
 make_images                       4572 14.5    0.134    0.139    2.694    2.814
 potential_pw2rs                    119 12.3    0.004    0.004    2.731    2.787
 acc_transpose_blocks             54864 15.5    0.228    0.257    2.227    2.784
 ot_diis_step                       108 11.5    0.006    0.006    2.699    2.700
 init_scf_loop                       11  6.9    0.000    0.000    2.695    2.695
 jit_kernel_multiply                 13 15.8    2.063    2.676    2.063    2.676
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.509    2.515
 fft3d_ps                          1201 14.6    0.378    0.489    2.345    2.454
 wfi_extrapolate                     11  7.9    0.001    0.001    2.406    2.406
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.363    2.365
 multiply_cannon_metrocomm1       54864 15.5    0.058    0.065    1.373    2.281
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.213    2.256
 fft_wrap_pw1pw2_140                487 13.2    0.179    0.198    2.054    2.178
 grid_integrate_task_list           119 12.3    2.009    2.119    2.009    2.119
 mp_alltoall_d11v                  2130 13.8    1.862    2.045    1.862    2.045
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.971    2.016
 mp_sum_d                          4135 12.0    1.185    1.818    1.185    1.818
 make_images_sizes                 4572 15.5    0.004    0.004    1.257    1.576
 mp_alltoall_i44                   4572 16.5    1.253    1.572    1.253    1.572
 mp_waitany                       12084 13.8    1.419    1.540    1.419    1.540
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.001    1.503    1.524
 mp_alltoall_z22v                  1201 16.6    1.400    1.496    1.400    1.496
 prepare_preconditioner              11  7.9    0.000    0.000    1.430    1.462
 make_preconditioner                 11  8.9    0.000    0.000    1.430    1.462
 acc_transpose_blocks_sync       164592 16.5    1.194    1.454    1.194    1.454
 grid_collocate_task_list           119  9.7    1.356    1.424    1.356    1.424
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.355    1.392
 make_images_data                  4572 15.5    0.040    0.045    0.802    1.198
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.136    1.158
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=56.151000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=431.181818, yerr=1.113404
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             487.489536E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10306                    303.
 MP_Sync                54
 MP_Alltoall          2060                 583589.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.028   40.284   40.285
 qs_mol_dyn_low                       1  2.0    0.003    0.003   40.086   40.096
 qs_forces                           11  3.9    0.002    0.003   40.020   40.021
 qs_energies                         11  4.9    0.001    0.007   38.283   38.287
 scf_env_do_scf                      11  5.9    0.000    0.001   32.962   32.963
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   30.355   30.355
 dbcsr_multiply_generic            2286 12.5    0.102    0.106   22.441   22.840
 qs_scf_new_mos                     108  7.5    0.001    0.001   20.999   21.219
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   20.998   21.218
 ot_scf_mini                        108  9.5    0.002    0.003   20.068   20.223
 velocity_verlet                     10  3.0    0.001    0.002   19.096   19.097
 multiply_cannon                   2286 13.5    0.208    0.217   17.218   18.734
 multiply_cannon_loop              2286 14.5    1.194    1.252   16.025   17.597
 ot_mini                            108 10.5    0.001    0.001   12.382   12.608
 mp_waitall_1                    200699 16.5    5.812   10.996    5.812   10.996
 qs_ot_get_derivative               108 11.5    0.001    0.001   10.004   10.164
 multiply_cannon_metrocomm3       27432 15.5    0.071    0.074    4.189    9.649
 multiply_cannon_multrec          27432 15.5    1.814    4.224    6.256    9.171
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.375    7.528
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    7.375    7.527
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.495    6.634
 dbcsr_mm_accdrv_process          47894 16.0    3.607    6.291    4.364    6.495
 qs_ot_get_p                        119 10.4    0.001    0.001    4.735    4.957
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.843    4.692
 sum_up_and_integrate               119 10.3    0.025    0.028    4.266    4.272
 integrate_v_rspace                 119 11.3    0.002    0.003    4.241    4.248
 mp_sum_l                          7287 12.8    2.096    4.156    2.096    4.156
 init_scf_run                        11  5.9    0.000    0.001    4.120    4.121
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    4.120    4.121
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.010    4.088
 apply_single                       119 13.6    0.000    0.000    3.010    4.087
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.884    3.921
 calculate_rho_elec                 119  8.7    0.021    0.024    3.883    3.921
 qs_ot_p2m_diag                      50 11.0    0.008    0.012    3.047    3.069
 rs_pw_transfer                     974 11.9    0.010    0.011    2.652    3.047
 make_m2s                          4572 13.5    0.052    0.053    2.676    3.011
 make_images                       4572 14.5    0.205    0.245    2.589    2.927
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.643    2.644
 init_scf_loop                       11  6.9    0.000    0.002    2.588    2.588
 calculate_first_density_matrix       1  7.0    0.003    0.021    2.554    2.557
 density_rs2pw                      119  9.7    0.004    0.004    2.141    2.555
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.372    2.456
 ot_diis_step                       108 11.5    0.011    0.011    2.327    2.328
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.311    2.312
 cp_fm_redistribute_end              50 14.0    1.172    2.285    1.175    2.287
 jit_kernel_multiply                 11 16.2    0.697    2.278    0.697    2.278
 calculate_dm_sparse                119  9.5    0.000    0.001    2.175    2.255
 cp_fm_diag_elpa_base                50 14.0    1.079    2.192    1.108    2.229
 multiply_cannon_sync_h2d         27432 15.5    1.676    2.199    1.676    2.199
 pw_transfer                       1439 11.6    0.066    0.070    1.963    2.005
 acc_transpose_blocks             27432 15.5    0.114    0.120    1.660    2.005
 potential_pw2rs                    119 12.3    0.006    0.006    1.974    1.983
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.968    1.970
 grid_integrate_task_list           119 12.3    1.840    1.937    1.840    1.937
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.871    1.914
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.752    1.797
 prepare_preconditioner              11  7.9    0.000    0.000    1.647    1.674
 make_preconditioner                 11  8.9    0.000    0.000    1.647    1.674
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.548    1.606
 make_images_data                  4572 15.5    0.047    0.053    1.261    1.599
 fft_wrap_pw1pw2_140                487 13.2    0.203    0.213    1.474    1.517
 wfi_extrapolate                     11  7.9    0.001    0.001    1.506    1.506
 hybrid_alltoall_any               4725 16.4    0.053    0.116    1.124    1.492
 fft3d_ps                          1201 14.6    0.520    0.576    1.421    1.454
 grid_collocate_task_list           119  9.7    1.282    1.422    1.282    1.422
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.355    1.365
 mp_alltoall_d11v                  2130 13.8    1.227    1.356    1.227    1.356
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.230    1.274
 mp_sum_d                          4135 12.0    0.566    1.040    0.566    1.040
 mp_allgather_i34                  2286 14.5    0.617    1.026    0.617    1.026
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.937    0.951
 acc_transpose_blocks_sync        82296 16.5    0.818    0.938    0.818    0.938
 rs_pw_transfer_RS2PW_140           130 11.5    0.140    0.150    0.536    0.934
 qs_energies_init_hamiltonians       11  5.9    0.000    0.001    0.930    0.931
 mp_waitany                        5720 13.7    0.509    0.924    0.509    0.924
 acc_transpose_blocks_kernels     27432 16.5    0.190    0.282    0.702    0.921
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=40.285000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=464.454545, yerr=1.233151
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             518.266880E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63490.
 MP_Allreduce        10155                    305.
 MP_Sync                54
 MP_Alltoall          1821                1607820.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.042   34.076   34.077
 qs_mol_dyn_low                       1  2.0    0.003    0.003   33.804   33.814
 qs_forces                           11  3.9    0.003    0.007   33.736   33.737
 qs_energies                         11  4.9    0.001    0.005   32.100   32.103
 scf_env_do_scf                      11  5.9    0.001    0.001   27.340   27.340
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   24.726   24.727
 dbcsr_multiply_generic            2286 12.5    0.097    0.100   17.375   17.498
 velocity_verlet                     10  3.0    0.001    0.002   16.234   16.236
 qs_scf_new_mos                     108  7.5    0.001    0.001   16.154   16.174
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   16.153   16.173
 ot_scf_mini                        108  9.5    0.002    0.003   15.363   15.380
 multiply_cannon                   2286 13.5    0.193    0.199   13.775   14.550
 multiply_cannon_loop              2286 14.5    0.867    0.919   12.952   13.870
 ot_mini                            108 10.5    0.001    0.001    9.390    9.414
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.870    7.887
 multiply_cannon_multrec          18288 15.5    1.865    2.851    6.932    7.224
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.624    6.656
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.624    6.656
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.828    5.858
 dbcsr_mm_accdrv_process          38222 16.0    4.917    5.730    4.976    5.798
 mp_waitall_1                    158411 16.6    2.926    4.298    2.926    4.298
 sum_up_and_integrate               119 10.3    0.030    0.031    4.161    4.168
 integrate_v_rspace                 119 11.3    0.003    0.003    4.131    4.141
 qs_ot_get_p                        119 10.4    0.001    0.001    3.661    3.690
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.682    3.690
 calculate_rho_elec                 119  8.7    0.031    0.032    3.682    3.689
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.956    3.568
 init_scf_run                        11  5.9    0.000    0.001    3.549    3.549
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    3.548    3.549
 multiply_cannon_metrocomm3       18288 15.5    0.047    0.048    1.537    2.796
 rs_pw_transfer                     974 11.9    0.009    0.010    2.527    2.781
 init_scf_loop                       11  6.9    0.000    0.002    2.596    2.599
 qs_ot_p2m_diag                      50 11.0    0.012    0.018    2.419    2.428
 density_rs2pw                      119  9.7    0.004    0.004    2.161    2.426
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.029    2.375
 apply_single                       119 13.6    0.000    0.000    2.029    2.375
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.252    2.254
 make_m2s                          4572 13.5    0.044    0.045    1.966    2.133
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.114    2.116
 pw_transfer                       1439 11.6    0.065    0.069    2.043    2.062
 make_images                       4572 14.5    0.193    0.205    1.881    2.048
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.950    1.970
 potential_pw2rs                    119 12.3    0.007    0.008    1.907    1.918
 calculate_dm_sparse                119  9.5    0.000    0.001    1.878    1.890
 grid_integrate_task_list           119 12.3    1.799    1.882    1.799    1.882
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.825    1.833
 cp_fm_diag_elpa_base                50 14.0    1.801    1.811    1.823    1.832
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.818    1.828
 acc_transpose_blocks             18288 15.5    0.079    0.080    1.644    1.767
 prepare_preconditioner              11  7.9    0.000    0.000    1.755    1.758
 make_preconditioner                 11  8.9    0.000    0.003    1.755    1.758
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.700    1.702
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.619    1.698
 mp_sum_l                          7287 12.8    1.246    1.632    1.246    1.632
 fft_wrap_pw1pw2_140                487 13.2    0.255    0.260    1.526    1.543
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.001    1.529    1.539
 ot_diis_step                       108 11.5    0.011    0.011    1.494    1.495
 fft3d_ps                          1201 14.6    0.532    0.548    1.436    1.457
 grid_collocate_task_list           119  9.7    1.242    1.401    1.242    1.401
 wfi_extrapolate                     11  7.9    0.001    0.001    1.242    1.242
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.185    1.190
 multiply_cannon_sync_h2d         18288 15.5    0.990    1.152    0.990    1.152
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.021    1.036
 make_images_data                  4572 15.5    0.047    0.050    0.852    1.023
 qs_energies_init_hamiltonians       11  5.9    0.000    0.002    0.975    0.976
 hybrid_alltoall_any               4725 16.4    0.058    0.115    0.733    0.922
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.868    0.871
 mp_alltoall_d11v                  2130 13.8    0.715    0.869    0.715    0.869
 acc_transpose_blocks_sync        54864 16.5    0.756    0.869    0.756    0.869
 mp_waitany                        9880 13.7    0.557    0.836    0.557    0.836
 mp_alltoall_z22v                  1201 16.6    0.743    0.816    0.743    0.816
 rs_pw_transfer_RS2PW_140           130 11.5    0.118    0.122    0.536    0.800
 acc_transpose_blocks_kernels     18288 16.5    0.221    0.227    0.790    0.798
 cp_fm_cholesky_invert               11 10.9    0.744    0.748    0.744    0.748
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    0.681    0.734
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=34.077000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=494.181818, yerr=1.336085
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             555.470848E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63489.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.020    0.033   38.790   38.791
 qs_mol_dyn_low                       1  2.0    0.003    0.004   38.568   38.578
 qs_forces                           11  3.9    0.003    0.006   38.488   38.490
 qs_energies                         11  4.9    0.002    0.006   36.708   36.712
 scf_env_do_scf                      11  5.9    0.001    0.001   31.505   31.506
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   27.780   27.781
 dbcsr_multiply_generic            2286 12.5    0.100    0.104   20.327   20.453
 velocity_verlet                     10  3.0    0.001    0.002   19.743   19.746
 qs_scf_new_mos                     108  7.5    0.001    0.001   18.700   18.751
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   18.700   18.750
 ot_scf_mini                        108  9.5    0.002    0.003   17.643   17.695
 multiply_cannon                   2286 13.5    0.217    0.227   16.216   16.600
 multiply_cannon_loop              2286 14.5    1.557    1.632   15.202   15.515
 ot_mini                            108 10.5    0.001    0.001   10.647   10.708
 multiply_cannon_multrec          27432 15.5    2.453    3.132    9.035    9.310
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.760    8.813
 dbcsr_mm_accdrv_process          47916 15.9    6.155    7.774    6.482    7.837
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.036    7.081
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    7.036    7.081
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.250    6.291
 qs_ot_get_p                        119 10.4    0.001    0.001    4.105    4.178
 sum_up_and_integrate               119 10.3    0.035    0.037    4.143    4.154
 integrate_v_rspace                 119 11.3    0.003    0.003    4.108    4.119
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.818    3.859
 calculate_rho_elec                 119  8.7    0.040    0.046    3.817    3.858
 init_scf_run                        11  5.9    0.000    0.001    3.785    3.786
 scf_env_initial_rho_setup           11  6.9    0.001    0.004    3.785    3.786
 init_scf_loop                       11  6.9    0.001    0.003    3.703    3.706
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.187    3.645
 prepare_preconditioner              11  7.9    0.000    0.000    2.775    2.784
 make_preconditioner                 11  8.9    0.000    0.001    2.775    2.783
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.366    2.700
 make_m2s                          4572 13.5    0.054    0.056    2.559    2.672
 rs_pw_transfer                     974 11.9    0.008    0.009    2.431    2.633
 mp_waitall_1                    137007 16.6    2.095    2.622    2.095    2.622
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.179    2.613
 apply_single                       119 13.6    0.000    0.000    2.179    2.612
 make_images                       4572 14.5    0.272    0.331    2.451    2.562
 qs_ot_p2m_diag                      50 11.0    0.015    0.022    2.512    2.523
 acc_transpose_blocks             27432 15.5    0.119    0.122    2.435    2.510
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.471    2.496
 density_rs2pw                      119  9.7    0.004    0.004    2.167    2.363
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.270    2.274
 calculate_dm_sparse                119  9.5    0.000    0.000    2.177    2.236
 pw_transfer                       1439 11.6    0.066    0.069    2.152    2.192
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.059    2.102
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.097    2.098
 grid_integrate_task_list           119 12.3    1.829    1.925    1.829    1.925
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.877    1.879
 potential_pw2rs                    119 12.3    0.008    0.009    1.865    1.877
 ot_diis_step                       108 11.5    0.012    0.012    1.842    1.843
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.769    1.780
 cp_fm_diag_elpa_base                50 14.0    1.731    1.752    1.767    1.778
 fft_wrap_pw1pw2_140                487 13.2    0.290    0.302    1.652    1.698
 mp_sum_l                          7287 12.8    1.125    1.688    1.125    1.688
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.667    1.680
 acc_transpose_blocks_sync        82296 16.5    1.462    1.533    1.462    1.533
 fft3d_ps                          1201 14.6    0.556    0.611    1.494    1.524
 wfi_extrapolate                     11  7.9    0.001    0.001    1.454    1.454
 grid_collocate_task_list           119  9.7    1.249    1.371    1.249    1.371
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.325    1.345
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.255    1.267
 jit_kernel_multiply                  7 16.0    0.260    1.241    0.260    1.241
 cp_fm_upper_to_full                 72 14.2    0.843    1.199    0.843    1.199
 multiply_cannon_metrocomm3       27432 15.5    0.040    0.041    0.685    1.187
 make_images_data                  4572 15.5    0.047    0.051    0.994    1.137
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.129    1.132
 dbcsr_complete_redistribute        329 12.2    0.127    0.159    0.853    1.128
 hybrid_alltoall_any               4725 16.4    0.065    0.153    0.891    1.074
 mp_alltoall_d11v                  2130 13.8    0.872    0.997    0.872    0.997
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.878    0.882
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    0.801    0.879
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.610    0.879
 mp_alltoall_z22v                  1201 16.6    0.821    0.850    0.821    0.850
 acc_transpose_blocks_kernels     27432 16.5    0.275    0.282    0.825    0.840
 cp_fm_cholesky_invert               11 10.9    0.812    0.817    0.812    0.817
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=38.791000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=528.272727, yerr=3.413633
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             616.525824E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63488.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.035   30.272   30.275
 qs_mol_dyn_low                       1  2.0    0.003    0.003   30.088   30.096
 qs_forces                           11  3.9    0.002    0.002   30.027   30.028
 qs_energies                         11  4.9    0.001    0.001   28.278   28.280
 scf_env_do_scf                      11  5.9    0.000    0.001   23.501   23.501
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   20.837   20.837
 velocity_verlet                     10  3.0    0.002    0.002   15.780   15.782
 dbcsr_multiply_generic            2286 12.5    0.093    0.097   13.419   13.500
 qs_scf_new_mos                     108  7.5    0.001    0.001   12.422   12.453
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   12.421   12.452
 ot_scf_mini                        108  9.5    0.002    0.002   11.684   11.711
 multiply_cannon                   2286 13.5    0.224    0.230   10.521   11.060
 multiply_cannon_loop              2286 14.5    0.650    0.669    9.542    9.718
 ot_mini                            108 10.5    0.001    0.001    6.596    6.625
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.201    6.220
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    6.201    6.220
 multiply_cannon_multrec           9144 15.5    1.657    1.856    5.919    6.177
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.531    5.548
 qs_ot_get_derivative               108 11.5    0.001    0.001    5.261    5.287
 dbcsr_mm_accdrv_process          12550 15.8    3.425    4.195    4.155    4.238
 sum_up_and_integrate               119 10.3    0.037    0.041    3.815    3.822
 integrate_v_rspace                 119 11.3    0.003    0.003    3.777    3.784
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.758    3.764
 calculate_rho_elec                 119  8.7    0.060    0.061    3.758    3.764
 init_scf_run                        11  5.9    0.000    0.001    3.314    3.314
 scf_env_initial_rho_setup           11  6.9    0.000    0.000    3.314    3.314
 qs_ot_get_p                        119 10.4    0.001    0.001    3.119    3.159
 init_scf_loop                       11  6.9    0.000    0.000    2.641    2.642
 pw_transfer                       1439 11.6    0.066    0.069    2.169    2.181
 density_rs2pw                      119  9.7    0.004    0.004    2.025    2.178
 mp_waitall_1                    115863 16.7    1.661    2.172    1.661    2.172
 make_m2s                          4572 13.5    0.034    0.035    2.029    2.163
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.096    2.097
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.075    2.089
 make_images                       4572 14.5    0.269    0.299    1.940    2.072
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    2.038    2.040
 rs_pw_transfer                     974 11.9    0.008    0.008    1.868    2.009
 grid_integrate_task_list           119 12.3    1.878    1.931    1.878    1.931
 prepare_preconditioner              11  7.9    0.000    0.000    1.889    1.892
 make_preconditioner                 11  8.9    0.000    0.000    1.889    1.892
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.856    1.882
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.777    1.802
 calculate_dm_sparse                119  9.5    0.000    0.000    1.770    1.788
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.781    1.782
 fft_wrap_pw1pw2_140                487 13.2    0.367    0.377    1.671    1.686
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.559    1.560
 potential_pw2rs                    119 12.3    0.010    0.011    1.504    1.508
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.448    1.460
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.446    1.454
 cp_fm_diag_elpa_base                50 14.0    1.418    1.437    1.444    1.452
 acc_transpose_blocks              9144 15.5    0.042    0.043    1.426    1.451
 grid_collocate_task_list           119  9.7    1.303    1.426    1.303    1.426
 fft3d_ps                          1201 14.6    0.563    0.578    1.405    1.419
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.375    1.382
 ot_diis_step                       108 11.5    0.013    0.013    1.320    1.320
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.257    1.286
 apply_single                       119 13.6    0.000    0.000    1.256    1.286
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.233    1.234
 wfi_extrapolate                     11  7.9    0.001    0.001    1.160    1.160
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.131    1.137
 hybrid_alltoall_any               4725 16.4    0.065    0.176    0.910    1.127
 make_images_data                  4572 15.5    0.042    0.045    0.918    1.091
 jit_kernel_multiply                  6 15.9    0.691    1.025    0.691    1.025
 mp_alltoall_d11v                  2130 13.8    0.885    0.980    0.885    0.980
 cp_fm_cholesky_invert               11 10.9    0.959    0.961    0.959    0.961
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.876    0.928
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.863    0.868
 mp_allgather_i34                  2286 14.5    0.293    0.798    0.293    0.798
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.793    0.796
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.021    0.343    0.756
 acc_transpose_blocks_sync        27432 16.5    0.726    0.753    0.726    0.753
 mp_alltoall_z22v                  1201 16.6    0.716    0.745    0.716    0.745
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    0.679    0.726
 acc_transpose_blocks_kernels      9144 16.5    0.119    0.121    0.642    0.649
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=30.275000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=580.818182, yerr=7.055857
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             786.735104E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63723.
 MP_Allreduce        10154                    429.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.035   43.226   43.227
 qs_mol_dyn_low                       1  2.0    0.003    0.003   43.023   43.032
 qs_forces                           11  3.9    0.002    0.002   42.953   42.954
 qs_energies                         11  4.9    0.001    0.001   40.935   40.939
 scf_env_do_scf                      11  5.9    0.001    0.001   35.143   35.143
 scf_env_do_scf_inner_loop          108  6.5    0.004    0.007   27.256   27.257
 velocity_verlet                     10  3.0    0.002    0.002   24.460   24.466
 dbcsr_multiply_generic            2286 12.5    0.100    0.103   19.050   19.221
 qs_scf_new_mos                     108  7.5    0.001    0.001   17.325   17.418
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   17.324   17.417
 ot_scf_mini                        108  9.5    0.002    0.002   16.166   16.265
 multiply_cannon                   2286 13.5    0.298    0.304   15.076   15.989
 multiply_cannon_loop              2286 14.5    0.869    0.900   13.790   14.729
 ot_mini                            108 10.5    0.001    0.001    9.821    9.940
 multiply_cannon_multrec           9144 15.5    3.416    4.875    8.768    8.849
 init_scf_loop                       11  6.9    0.000    0.000    7.860    7.861
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.732    7.832
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.438    7.584
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    7.438    7.583
 prepare_preconditioner              11  7.9    0.000    0.000    6.865    6.880
 make_preconditioner                 11  8.9    0.000    0.000    6.865    6.880
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.695    6.827
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.416    6.757
 dbcsr_mm_accdrv_process          12550 15.8    4.599    6.482    5.219    6.537
 cp_fm_upper_to_full                 72 14.2    3.161    4.552    3.161    4.552
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.392    4.453
 calculate_rho_elec                 119  8.7    0.118    0.121    4.392    4.453
 sum_up_and_integrate               119 10.3    0.065    0.066    4.098    4.104
 integrate_v_rspace                 119 11.3    0.004    0.004    4.032    4.038
 init_scf_run                        11  5.9    0.000    0.001    3.727    3.727
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    3.727    3.727
 qs_ot_get_p                        119 10.4    0.001    0.001    3.391    3.529
 mp_waitall_1                     94719 16.7    2.386    3.391    2.386    3.391
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.818    3.236
 dbcsr_complete_redistribute        329 12.2    0.288    0.292    2.011    2.831
 pw_transfer                       1439 11.6    0.069    0.069    2.706    2.710
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.607    2.612
 make_m2s                          4572 13.5    0.037    0.037    2.393    2.568
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.260    2.549
 apply_single                       119 13.6    0.000    0.000    2.260    2.549
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.683    2.502
 make_images                       4572 14.5    0.353    0.386    2.274    2.449
 calculate_dm_sparse                119  9.5    0.000    0.000    2.292    2.341
 mp_alltoall_i22                    627 13.8    1.427    2.282    1.427    2.282
 multiply_cannon_metrocomm3        9144 15.5    0.021    0.021    1.366    2.280
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.444    2.256
 density_rs2pw                      119  9.7    0.004    0.004    2.208    2.251
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.154    2.212
 fft_wrap_pw1pw2_140                487 13.2    0.620    0.623    2.199    2.204
 grid_integrate_task_list           119 12.3    2.095    2.132    2.095    2.132
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.068    2.120
 ot_diis_step                       108 11.5    0.014    0.014    2.062    2.063
 qs_ot_p2m_diag                      50 11.0    0.042    0.043    2.004    2.007
 acc_transpose_blocks              9144 15.5    0.044    0.045    1.814    1.864
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.818    1.819
 mp_sum_l                          7287 12.8    1.034    1.792    1.034    1.792
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.775    1.776
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.648    1.649
 fft3d_ps                          1201 14.6    0.595    0.605    1.619    1.626
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.554    1.592
 grid_collocate_task_list           119  9.7    1.534    1.565    1.534    1.565
 rs_pw_transfer                     974 11.9    0.009    0.009    1.512    1.554
 cp_fm_cholesky_invert               11 10.9    1.463    1.466    1.463    1.466
 wfi_extrapolate                     11  7.9    0.001    0.001    1.448    1.449
 potential_pw2rs                    119 12.3    0.014    0.014    1.420    1.423
 hybrid_alltoall_any               4725 16.4    0.090    0.149    1.101    1.375
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.302    1.355
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.354    1.354
 cp_fm_diag_elpa_base                50 14.0    1.208    1.262    1.352    1.352
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.312    1.331
 mp_alltoall_d11v                  2130 13.8    1.197    1.285    1.197    1.285
 make_images_data                  4572 15.5    0.045    0.048    1.043    1.267
 acc_transpose_blocks_sync        27432 16.5    1.101    1.147    1.101    1.147
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.106    1.126
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    0.992    1.027
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.953    0.967
 qs_create_task_list                 11  7.9    0.000    0.000    0.931    0.944
 generate_qs_task_list               11  8.9    0.367    0.387    0.931    0.944
 mp_alltoall_z22v                  1201 16.6    0.888    0.904    0.888    0.904
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=43.227000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=744.545455, yerr=14.278806
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             499.875840E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66212.
 MP_Allreduce         9776                    488.
 MP_Sync                52
 MP_Alltoall          1938                 979029.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.046    0.118   82.708   82.708
 qs_mol_dyn_low                       1  2.0    0.006    0.007   82.341   82.352
 qs_forces                           11  3.9    0.003    0.004   82.236   82.236
 qs_energies                         11  4.9    0.001    0.001   79.312   79.330
 scf_env_do_scf                      11  5.9    0.001    0.001   70.385   70.387
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   64.849   64.849
 dbcsr_multiply_generic            2055 12.4    0.108    0.111   51.121   51.453
 qs_scf_new_mos                      99  7.5    0.000    0.001   47.388   47.506
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   47.387   47.505
 ot_scf_mini                         99  9.5    0.002    0.002   44.994   45.110
 velocity_verlet                     10  3.0    0.001    0.002   43.640   43.640
 multiply_cannon                   2055 13.4    0.178    0.182   42.623   43.434
 multiply_cannon_loop              2055 14.4    1.811    1.858   41.649   42.325
 ot_mini                             99 10.5    0.001    0.001   26.676   26.771
 qs_ot_get_derivative                99 11.5    0.001    0.001   19.856   19.935
 multiply_cannon_multrec          49320 15.4   11.358   12.013   17.343   18.070
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.649   14.746
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.012   14.648   14.746
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.807   12.896
 mp_waitall_1                    220248 16.4   10.883   11.984   10.883   11.984
 multiply_cannon_sync_h2d         49320 15.4    9.530   10.061    9.530   10.061
 qs_ot_get_p                        110 10.4    0.001    0.001    9.789    9.934
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.617    8.173
 multiply_cannon_metrocomm3       49320 15.4    0.085    0.089    6.597    7.799
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.229    7.703
 apply_single                       110 13.6    0.000    0.000    7.229    7.703
 sum_up_and_integrate               110 10.3    0.036    0.042    7.134    7.147
 integrate_v_rspace                 110 11.3    0.003    0.004    7.098    7.119
 init_scf_run                        11  5.9    0.000    0.001    6.804    6.805
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    6.804    6.805
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.692    6.803
 calculate_rho_elec                 110  8.6    0.021    0.026    6.691    6.803
 qs_ot_p2m_diag                      48 11.0    0.012    0.018    6.557    6.599
 ot_diis_step                        99 11.5    0.005    0.006    6.585    6.585
 dbcsr_mm_accdrv_process          87628 16.1    3.038    3.148    5.855    6.111
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    5.686    5.687
 init_scf_loop                       11  6.9    0.000    0.000    5.509    5.510
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    5.338    5.388
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    5.113    5.139
 cp_fm_diag_elpa_base                48 14.0    5.100    5.126    5.111    5.138
 mp_sum_l                          6594 12.7    4.098    4.788    4.098    4.788
 rs_pw_transfer                     902 11.9    0.012    0.014    3.696    4.306
 wfi_extrapolate                     11  7.9    0.001    0.001    4.077    4.077
 density_rs2pw                      110  9.6    0.004    0.005    3.408    4.056
 make_m2s                          4110 13.4    0.060    0.065    3.933    4.043
 make_images                       4110 14.4    0.177    0.190    3.837    3.951
 calculate_dm_sparse                110  9.5    0.001    0.001    3.770    3.875
 multiply_cannon_metrocomm1       49320 15.4    0.067    0.070    2.447    3.689
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.532    3.536
 grid_integrate_task_list           110 12.3    3.255    3.408    3.255    3.408
 prepare_preconditioner              11  7.9    0.000    0.000    3.348    3.374
 make_preconditioner                 11  8.9    0.000    0.000    3.348    3.374
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    3.229    3.298
 pw_transfer                       1331 11.6    0.055    0.070    3.195    3.275
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.195    3.250
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.150    3.193
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.106    3.188
 fft_wrap_pw1pw2_140                451 13.1    0.452    0.497    2.650    2.741
 potential_pw2rs                    110 12.3    0.006    0.007    2.639    2.660
 acc_transpose_blocks             49320 15.4    0.212    0.220    2.582    2.649
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.622    2.628
 mp_alltoall_d11v                  2046 13.8    2.071    2.567    2.071    2.567
 mp_waitany                       14300 13.8    1.831    2.562    1.831    2.562
 jit_kernel_multiply                 13 15.9    2.538    2.550    2.538    2.550
 grid_collocate_task_list           110  9.6    2.155    2.392    2.155    2.392
 fft3d_ps                          1111 14.6    0.792    0.877    2.301    2.363
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.971    1.981
 make_images_data                  4110 15.4    0.044    0.048    1.778    1.925
 cp_fm_cholesky_invert               11 10.9    1.878    1.881    1.878    1.881
 mp_sum_d                          3889 11.9    1.359    1.868    1.359    1.868
 hybrid_alltoall_any               4261 16.3    0.083    0.483    1.536    1.817
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.663    1.704
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=82.708000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=475.636364, yerr=1.822722
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             591.544320E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66430.
 MP_Allreduce         9775                    566.
 MP_Sync                52
 MP_Alltoall          1717                2714709.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.045    0.093   71.367   71.380
 qs_mol_dyn_low                       1  2.0    0.004    0.007   70.639   70.651
 qs_forces                           11  3.9    0.004    0.011   70.561   70.563
 qs_energies                         11  4.9    0.016    0.056   67.144   67.148
 scf_env_do_scf                      11  5.9    0.001    0.001   58.267   58.270
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   50.358   50.360
 dbcsr_multiply_generic            2055 12.4    0.118    0.123   38.322   38.535
 velocity_verlet                     10  3.0    0.001    0.002   37.072   37.074
 qs_scf_new_mos                      99  7.5    0.001    0.001   33.763   33.901
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   33.763   33.900
 multiply_cannon                   2055 13.4    0.223    0.244   31.496   32.520
 ot_scf_mini                         99  9.5    0.003    0.004   32.077   32.212
 multiply_cannon_loop              2055 14.4    1.165    1.191   30.178   30.921
 ot_mini                             99 10.5    0.001    0.001   18.696   18.822
 multiply_cannon_multrec          24660 15.4    6.974    8.356   14.036   15.298
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.977   14.055
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   13.976   14.055
 qs_ot_get_derivative                99 11.5    0.001    0.002   12.850   12.979
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.271   12.342
 mp_waitall_1                    176588 16.5    7.792   10.145    7.792   10.145
 init_scf_loop                       11  6.9    0.001    0.004    7.873    7.877
 multiply_cannon_metrocomm3       24660 15.4    0.072    0.074    5.260    7.555
 multiply_cannon_sync_h2d         24660 15.4    6.366    7.527    6.366    7.527
 dbcsr_mm_accdrv_process          52282 16.1    5.515    6.299    6.894    7.234
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.534    7.199
 apply_single                       110 13.6    0.000    0.001    6.533    7.199
 qs_ot_get_p                        110 10.4    0.001    0.001    6.663    6.824
 sum_up_and_integrate               110 10.3    0.053    0.059    6.759    6.773
 integrate_v_rspace                 110 11.3    0.003    0.003    6.706    6.721
 init_scf_run                        11  5.9    0.000    0.001    6.404    6.405
 scf_env_initial_rho_setup           11  6.9    0.000    0.002    6.404    6.405
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.338    6.356
 calculate_rho_elec                 110  8.6    0.040    0.048    6.337    6.355
 prepare_preconditioner              11  7.9    0.000    0.000    5.802    5.822
 make_preconditioner                 11  8.9    0.000    0.001    5.802    5.822
 ot_diis_step                        99 11.5    0.010    0.012    5.792    5.792
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.388    5.540
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.799    5.521
 make_m2s                          4110 13.4    0.056    0.059    4.344    4.807
 qs_ot_p2m_diag                      48 11.0    0.028    0.044    4.741    4.763
 make_images                       4110 14.4    0.407    0.466    4.236    4.695
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    4.261    4.261
 density_rs2pw                      110  9.6    0.004    0.005    3.389    3.872
 pw_transfer                       1331 11.6    0.067    0.077    3.725    3.864
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    3.618    3.760
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.680    3.694
 cp_fm_diag_elpa_base                48 14.0    3.631    3.650    3.678    3.691
 rs_pw_transfer                     902 11.9    0.012    0.014    3.098    3.589
 wfi_extrapolate                     11  7.9    0.001    0.001    3.563    3.563
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.391    3.473
 grid_integrate_task_list           110 12.3    3.158    3.331    3.158    3.331
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.262    3.264
 fft_wrap_pw1pw2_140                451 13.1    0.523    0.544    3.070    3.212
 calculate_dm_sparse                110  9.5    0.001    0.001    2.996    3.027
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.923    2.956
 hybrid_alltoall_any               4261 16.3    0.105    0.451    2.095    2.884
 make_images_data                  4110 15.4    0.049    0.053    2.387    2.868
 fft3d_ps                          1111 14.6    1.109    1.335    2.587    2.742
 calculate_first_density_matrix       1  7.0    0.001    0.004    2.739    2.741
 cp_fm_cholesky_invert               11 10.9    2.715    2.722    2.715    2.722
 mp_sum_l                          6594 12.7    1.992    2.654    1.992    2.654
 potential_pw2rs                    110 12.3    0.008    0.009    2.517    2.537
 grid_collocate_task_list           110  9.6    2.101    2.535    2.101    2.535
 mp_alltoall_d11v                  2046 13.8    1.821    2.059    1.821    2.059
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    2.023    2.036
 acc_transpose_blocks             24660 15.4    0.116    0.120    1.978    2.007
 qs_energies_init_hamiltonians       11  5.9    0.000    0.002    1.906    1.908
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.864    1.873
 multiply_cannon_metrocomm4       22605 15.4    0.079    0.083    0.774    1.825
 jit_kernel_multiply                  9 16.4    1.015    1.795    1.015    1.795
 mp_waitany                       10164 13.8    1.275    1.775    1.275    1.775
 mp_allgather_i34                  2055 14.4    0.629    1.690    0.629    1.690
 mp_irecv_dv                      57340 16.2    0.645    1.600    0.645    1.600
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.546    1.557
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.392    1.495
 rs_pw_transfer_RS2PW_140           121 11.5    0.208    0.216    0.993    1.482
 dbcsr_complete_redistribute        325 12.2    0.248    0.316    1.177    1.450
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=71.380000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=558.727273, yerr=6.810298
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             665.108480E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66421.
 MP_Allreduce         9774                    562.
 MP_Sync                52
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.040   63.193   63.194
 qs_mol_dyn_low                       1  2.0    0.003    0.004   62.840   62.852
 qs_forces                           11  3.9    0.004    0.014   62.765   62.767
 qs_energies                         11  4.9    0.002    0.007   59.441   59.448
 scf_env_do_scf                      11  5.9    0.001    0.001   51.213   51.213
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.008   42.315   42.316
 velocity_verlet                     10  3.0    0.002    0.003   34.287   34.289
 dbcsr_multiply_generic            2055 12.4    0.109    0.116   29.745   30.118
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.671   26.784
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.671   26.783
 ot_scf_mini                         99  9.5    0.002    0.003   25.385   25.512
 multiply_cannon                   2055 13.4    0.213    0.221   22.877   24.203
 multiply_cannon_loop              2055 14.4    0.819    0.872   21.618   23.055
 ot_mini                             99 10.5    0.001    0.001   14.379   14.516
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.806   12.982
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   12.806   12.981
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.233   11.391
 mp_waitall_1                    139946 16.5    7.696   10.780    7.696   10.780
 multiply_cannon_multrec          16440 15.4    3.452    4.275    9.631   10.355
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.770    9.900
 init_scf_loop                       11  6.9    0.001    0.003    8.860    8.863
 multiply_cannon_metrocomm3       16440 15.4    0.045    0.047    4.845    7.876
 prepare_preconditioner              11  7.9    0.000    0.000    7.023    7.050
 make_preconditioner                 11  8.9    0.000    0.001    7.023    7.050
 sum_up_and_integrate               110 10.3    0.061    0.062    6.812    6.827
 integrate_v_rspace                 110 11.3    0.003    0.003    6.751    6.766
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.349    6.697
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.296    6.314
 calculate_rho_elec                 110  8.6    0.059    0.060    6.296    6.314
 dbcsr_mm_accdrv_process          34862 16.1    5.330    5.761    6.028    6.173
 qs_ot_get_p                        110 10.4    0.001    0.002    5.988    6.152
 init_scf_run                        11  5.9    0.000    0.001    5.712    5.713
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    5.712    5.712
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    5.077    5.582
 apply_single                       110 13.6    0.000    0.000    5.077    5.582
 make_m2s                          4110 13.4    0.051    0.061    4.493    4.864
 make_images                       4110 14.4    0.404    0.523    4.377    4.750
 density_rs2pw                      110  9.6    0.004    0.005    3.299    4.567
 ot_diis_step                        99 11.5    0.010    0.011    4.560    4.560
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    4.178    4.187
 rs_pw_transfer                     902 11.9    0.010    0.011    2.914    4.186
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.325    3.969
 pw_transfer                       1331 11.6    0.066    0.074    3.861    3.872
 multiply_cannon_sync_h2d         16440 15.4    3.182    3.847    3.182    3.847
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.785    3.786
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.754    3.767
 grid_integrate_task_list           110 12.3    3.181    3.391    3.181    3.391
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.206    3.216
 cp_fm_diag_elpa_base                48 14.0    3.140    3.172    3.204    3.214
 fft_wrap_pw1pw2_140                451 13.1    0.642    0.649    3.199    3.213
 wfi_extrapolate                     11  7.9    0.001    0.001    3.114    3.114
 make_images_data                  4110 15.4    0.045    0.050    2.538    3.003
 hybrid_alltoall_any               4261 16.3    0.109    0.386    2.235    2.954
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.912    2.913
 cp_fm_cholesky_invert               11 10.9    2.892    2.899    2.892    2.899
 qs_ot_get_derivative_diag           47 12.0    0.001    0.002    2.606    2.672
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.537    2.596
 calculate_dm_sparse                110  9.5    0.001    0.001    2.563    2.591
 mp_waitany                       17072 13.8    1.263    2.584    1.263    2.584
 fft3d_ps                          1111 14.6    1.091    1.099    2.569    2.582
 multiply_cannon_metrocomm4       14385 15.4    0.047    0.052    0.935    2.572
 mp_sum_l                          6594 12.7    1.751    2.517    1.751    2.517
 potential_pw2rs                    110 12.3    0.011    0.011    2.479    2.496
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.494    2.496
 grid_collocate_task_list           110  9.6    2.124    2.485    2.124    2.485
 mp_irecv_dv                      48980 15.7    0.861    2.437    0.861    2.437
 mp_alltoall_d11v                  2046 13.8    1.968    2.287    1.968    2.287
 rs_pw_transfer_RS2PW_140           121 11.5    0.175    0.180    0.934    2.194
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    2.048    2.051
 dbcsr_complete_redistribute        325 12.2    0.326    0.360    1.522    1.966
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.769    1.788
 mp_allgather_i34                  2055 14.4    0.575    1.732    0.575    1.732
 cp_fm_upper_to_full                 70 14.2    1.368    1.721    1.368    1.721
 acc_transpose_blocks             16440 15.4    0.079    0.083    1.584    1.694
 cp_fm_cholesky_decompose            22 10.9    1.643    1.664    1.643    1.664
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.467    1.476
 copy_fm_to_dbcsr                   174 11.2    0.001    0.002    1.033    1.475
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.373    1.468
 qs_ot_get_orbitals                  99 10.5    0.000    0.001    1.366    1.371
 rs_gather_matrices                 110 12.3    0.232    0.259    1.007    1.305
 mp_alltoall_z22v                  1111 16.6    1.269    1.291    1.269    1.291
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=63.194000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=630.000000, yerr=9.095453
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             739.979264E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66419.
 MP_Allreduce         9774                    603.
 MP_Sync                52
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.025    0.047   67.383   67.384
 qs_mol_dyn_low                       1  2.0    0.003    0.003   67.048   67.062
 qs_forces                           11  3.9    0.003    0.004   66.974   66.974
 qs_energies                         11  4.9    0.002    0.008   63.496   63.501
 scf_env_do_scf                      11  5.9    0.001    0.001   55.045   55.048
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   43.184   43.185
 velocity_verlet                     10  3.0    0.001    0.002   38.187   38.190
 dbcsr_multiply_generic            2055 12.4    0.115    0.121   30.820   31.011
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.804   27.911
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.804   27.910
 ot_scf_mini                         99  9.5    0.003    0.003   26.117   26.217
 multiply_cannon                   2055 13.4    0.240    0.262   23.670   24.833
 multiply_cannon_loop              2055 14.4    1.420    1.480   22.097   22.701
 ot_mini                             99 10.5    0.001    0.001   14.924   15.053
 multiply_cannon_multrec          24660 15.4    4.073    6.739   12.989   14.131
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.396   12.491
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.016   12.396   12.491
 init_scf_loop                       11  6.9    0.002    0.010   11.820   11.821
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.924   11.008
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.758   10.863
 prepare_preconditioner              11  7.9    0.000    0.000   10.008   10.023
 make_preconditioner                 11  8.9    0.000    0.001   10.008   10.023
 dbcsr_mm_accdrv_process          52304 16.0    7.614    9.014    8.760    9.698
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.212    9.681
 sum_up_and_integrate               110 10.3    0.067    0.071    6.611    6.623
 integrate_v_rspace                 110 11.3    0.003    0.003    6.543    6.557
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.350    6.358
 calculate_rho_elec                 110  8.6    0.078    0.082    6.350    6.358
 mp_waitall_1                    121746 16.5    4.195    6.237    4.195    6.237
 qs_ot_get_p                        110 10.4    0.001    0.001    6.050    6.202
 make_m2s                          4110 13.4    0.059    0.061    5.469    5.936
 make_images                       4110 14.4    0.577    0.701    5.328    5.790
 init_scf_run                        11  5.9    0.000    0.001    5.651    5.652
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    5.651    5.652
 cp_fm_upper_to_full                 70 14.2    3.340    4.851    3.340    4.851
 qs_ot_p2m_diag                      48 11.0    0.054    0.063    4.110    4.125
 ot_diis_step                        99 11.5    0.011    0.011    4.121    4.121
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.046    4.117
 apply_single                       110 13.6    0.000    0.000    4.046    4.117
 dbcsr_complete_redistribute        325 12.2    0.422    0.465    2.737    3.878
 pw_transfer                       1331 11.6    0.066    0.076    3.819    3.850
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.712    3.747
 density_rs2pw                      110  9.6    0.004    0.004    3.192    3.715
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.560    3.611
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.537    3.537
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.399    3.459
 grid_integrate_task_list           110 12.3    3.279    3.444    3.279    3.444
 copy_fm_to_dbcsr                   174 11.2    0.001    0.002    2.156    3.282
 fft_wrap_pw1pw2_140                451 13.1    0.672    0.691    3.161    3.197
 hybrid_alltoall_any               4261 16.3    0.123    0.460    2.325    3.192
 make_images_data                  4110 15.4    0.048    0.051    2.685    3.163
 wfi_extrapolate                     11  7.9    0.001    0.001    3.096    3.096
 rs_pw_transfer                     902 11.9    0.010    0.011    2.562    3.087
 multiply_cannon_metrocomm3       24660 15.4    0.038    0.039    1.315    3.024
 calculate_dm_sparse                110  9.5    0.001    0.001    2.985    3.014
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.983    2.993
 cp_fm_diag_elpa_base                48 14.0    2.828    2.886    2.981    2.990
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.787    2.902
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.849    2.851
 mp_alltoall_i22                    605 13.7    1.661    2.824    1.661    2.824
 cp_fm_cholesky_invert               11 10.9    2.728    2.738    2.728    2.738
 multiply_cannon_sync_h2d         24660 15.4    2.380    2.560    2.380    2.560
 acc_transpose_blocks             24660 15.4    0.110    0.113    2.426    2.544
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.001    2.479    2.517
 fft3d_ps                          1111 14.6    1.081    1.109    2.469    2.486
 calculate_first_density_matrix       1  7.0    0.001    0.006    2.455    2.458
 grid_collocate_task_list           110  9.6    2.222    2.433    2.222    2.433
 qs_energies_init_hamiltonians       11  5.9    0.000    0.002    2.292    2.292
 potential_pw2rs                    110 12.3    0.012    0.013    2.231    2.242
 mp_alltoall_d11v                  2046 13.8    1.853    2.156    1.853    2.156
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.810    1.842
 cp_fm_cholesky_decompose            22 10.9    1.679    1.723    1.679    1.723
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.713    1.723
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.617    1.717
 mp_allgather_i34                  2055 14.4    0.659    1.630    0.659    1.630
 mp_sum_l                          6594 12.7    1.005    1.624    1.005    1.624
 mp_waitany                       13376 13.8    1.101    1.591    1.101    1.591
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.543    1.559
 acc_transpose_blocks_sync        73980 16.4    1.414    1.518    1.414    1.518
 multiply_cannon_metrocomm4       20550 15.4    0.061    0.064    0.850    1.489
 jit_kernel_multiply                  8 15.8    0.802    1.437    0.802    1.437
 mp_irecv_dv                      62702 16.1    0.746    1.408    0.746    1.408
 rs_pw_transfer_RS2PW_140           121 11.5    0.168    0.177    0.859    1.396
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=67.384000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=698.909091, yerr=10.022289
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             860.901376E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66417.
 MP_Allreduce         9774                    644.
 MP_Sync                52
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.031   57.890   57.892
 qs_mol_dyn_low                       1  2.0    0.003    0.003   57.634   57.645
 qs_forces                           11  3.9    0.003    0.003   57.560   57.561
 qs_energies                         11  4.9    0.001    0.001   53.776   53.781
 scf_env_do_scf                      11  5.9    0.000    0.001   45.350   45.350
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   37.107   37.109
 velocity_verlet                     10  3.0    0.004    0.004   32.986   32.989
 dbcsr_multiply_generic            2055 12.4    0.105    0.109   23.702   23.852
 qs_scf_new_mos                      99  7.5    0.001    0.001   21.544   21.605
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   21.544   21.605
 ot_scf_mini                         99  9.5    0.002    0.002   20.266   20.294
 multiply_cannon                   2055 13.4    0.237    0.255   17.831   19.354
 multiply_cannon_loop              2055 14.4    0.604    0.627   16.510   16.886
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.263   12.299
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   12.262   12.299
 ot_mini                             99 10.5    0.001    0.001   11.090   11.110
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.872   10.906
 multiply_cannon_multrec           8220 15.4    3.198    4.303    7.592    8.482
 init_scf_loop                       11  6.9    0.000    0.000    8.192    8.193
 mp_waitall_1                    103326 16.6    6.186    7.855    6.186    7.855
 qs_ot_get_derivative                99 11.5    0.001    0.001    7.352    7.380
 sum_up_and_integrate               110 10.3    0.080    0.082    6.765    6.776
 integrate_v_rspace                 110 11.3    0.003    0.003    6.684    6.695
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.611    6.625
 calculate_rho_elec                 110  8.6    0.114    0.115    6.611    6.625
 prepare_preconditioner              11  7.9    0.000    0.000    6.447    6.455
 make_preconditioner                 11  8.9    0.000    0.000    6.447    6.455
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.001    6.080
 init_scf_run                        11  5.9    0.000    0.001    5.223    5.225
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    5.223    5.225
 dbcsr_mm_accdrv_process          17442 15.9    3.022    4.156    4.256    5.187
 qs_ot_get_p                        110 10.4    0.001    0.001    5.155    5.168
 make_m2s                          4110 13.4    0.038    0.039    4.412    4.706
 make_images                       4110 14.4    0.649    0.712    4.283    4.579
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.019    2.917    4.456
 pw_transfer                       1331 11.6    0.066    0.074    4.170    4.190
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    4.062    4.083
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.742    3.775
 apply_single                       110 13.6    0.000    0.000    3.742    3.775
 ot_diis_step                        99 11.5    0.012    0.012    3.712    3.713
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.703    3.706
 density_rs2pw                      110  9.6    0.004    0.005    3.247    3.617
 grid_integrate_task_list           110 12.3    3.374    3.519    3.374    3.519
 fft_wrap_pw1pw2_140                451 13.1    0.840    0.851    3.469    3.503
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.351    3.351
 cp_fm_cholesky_invert               11 10.9    3.133    3.139    3.133    3.139
 wfi_extrapolate                     11  7.9    0.001    0.001    2.856    2.858
 hybrid_alltoall_any               4261 16.3    0.200    0.846    2.378    2.847
 rs_pw_transfer                     902 11.9    0.010    0.010    2.417    2.810
 make_images_data                  4110 15.4    0.040    0.046    2.381    2.767
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.757    2.764
 cp_fm_diag_elpa_base                48 14.0    2.699    2.725    2.755    2.763
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.716    2.717
 fft3d_ps                          1111 14.6    1.135    1.170    2.598    2.609
 grid_collocate_task_list           110  9.6    2.314    2.570    2.314    2.570
 calculate_dm_sparse                110  9.5    0.001    0.001    2.517    2.556
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.522    2.524
 multiply_cannon_sync_h2d          8220 15.4    2.378    2.470    2.378    2.470
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.266    2.268
 potential_pw2rs                    110 12.3    0.015    0.015    2.260    2.265
 mp_alltoall_d11v                  2046 13.8    1.871    2.196    1.871    2.196
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.135    2.164
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.133    2.157
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.013    2.024
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.794    2.015
 cp_fm_cholesky_decompose            22 10.9    1.824    1.834    1.824    1.834
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.724    1.727
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    1.535    1.661
 mp_allgather_i34                  2055 14.4    0.489    1.646    0.489    1.646
 dbcsr_complete_redistribute        325 12.2    0.562    0.576    1.507    1.612
 mp_waitany                        9240 13.8    1.156    1.575    1.156    1.575
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.456    1.468
 multiply_cannon_metrocomm1        8220 15.4    0.022    0.023    0.982    1.398
 acc_transpose_blocks              8220 15.4    0.039    0.040    1.351    1.389
 jit_kernel_multiply                  8 15.6    0.924    1.371    0.924    1.371
 qs_create_task_list                 11  7.9    0.001    0.003    1.233    1.342
 generate_qs_task_list               11  8.9    0.375    0.445    1.233    1.342
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.252    1.269
 mp_alltoall_z22v                  1111 16.6    1.230    1.249    1.230    1.249
 rs_pw_transfer_RS2PW_140           121 11.5    0.162    0.166    0.839    1.245
 rs_gather_matrices                 110 12.3    0.324    0.367    0.956    1.208
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=57.892000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=813.636364, yerr=12.249473
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.405329E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67098.
 MP_Allreduce         9752                    812.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.019    0.038   89.882   89.884
 qs_mol_dyn_low                       1  2.0    0.003    0.003   89.545   89.559
 qs_forces                           11  3.9    0.003    0.003   89.469   89.470
 qs_energies                         11  4.9    0.001    0.001   85.252   85.254
 scf_env_do_scf                      11  5.9    0.000    0.001   75.094   75.094
 velocity_verlet                     10  3.0    0.002    0.002   57.212   57.219
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   46.503   46.505
 dbcsr_multiply_generic            2055 12.4    0.119    0.125   30.120   30.200
 init_scf_loop                       11  6.9    0.000    0.000   28.516   28.518
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.844   27.878
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.844   27.878
 prepare_preconditioner              11  7.9    0.000    0.000   26.406   26.413
 make_preconditioner                 11  8.9    0.000    0.000   26.406   26.413
 ot_scf_mini                         99  9.5    0.002    0.002   26.005   26.035
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.777   25.871
 multiply_cannon                   2055 13.4    0.330    0.357   22.615   23.333
 multiply_cannon_loop              2055 14.4    0.829    0.840   20.786   21.152
 cp_fm_upper_to_full                 70 14.2   12.589   17.922   12.589   17.922
 ot_mini                             99 10.5    0.001    0.001   14.529   14.555
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.488   14.539
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   14.488   14.539
 qs_ks_update_qs_env                110  7.6    0.001    0.001   13.096   13.143
 dbcsr_complete_redistribute        325 12.2    1.024    1.070    7.246   10.392
 multiply_cannon_multrec           8220 15.4    4.068    4.221    9.755    9.898
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.814    9.843
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.192    9.328
 mp_waitall_1                     84994 16.7    8.065    8.906    8.065    8.906
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.613    8.714
 mp_alltoall_i22                    605 13.7    5.227    8.376    5.227    8.376
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.893    7.928
 calculate_rho_elec                 110  8.6    0.225    0.225    7.892    7.928
 sum_up_and_integrate               110 10.3    0.151    0.151    7.527    7.548
 integrate_v_rspace                 110 11.3    0.004    0.004    7.376    7.397
 make_m2s                          4110 13.4    0.043    0.044    5.702    6.223
 qs_ot_get_p                        110 10.4    0.001    0.001    6.066    6.094
 make_images                       4110 14.4    0.882    0.924    5.514    6.034
 init_scf_run                        11  5.9    0.000    0.001    5.857    5.857
 scf_env_initial_rho_setup           11  6.9    0.000    0.001    5.856    5.856
 dbcsr_mm_accdrv_process          11614 15.7    3.837    4.133    5.539    5.741
 cp_fm_cholesky_invert               11 10.9    5.558    5.562    5.558    5.562
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.020    4.990    5.372
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.863    5.297
 apply_single                       110 13.6    0.000    0.000    4.863    5.296
 pw_transfer                       1331 11.6    0.076    0.076    5.187    5.195
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    5.069    5.077
 ot_diis_step                        99 11.5    0.015    0.015    4.691    4.691
 fft_wrap_pw1pw2_140                451 13.1    1.341    1.347    4.400    4.412
 qs_ot_p2m_diag                      48 11.0    0.150    0.155    4.372    4.379
 density_rs2pw                      110  9.6    0.004    0.005    3.856    3.897
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.849    3.849
 hybrid_alltoall_any               4261 16.3    0.263    0.566    3.071    3.844
 make_images_data                  4110 15.4    0.044    0.048    3.105    3.779
 grid_integrate_task_list           110 12.3    3.697    3.761    3.697    3.761
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.677    3.678
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.121    3.578
 wfi_extrapolate                     11  7.9    0.001    0.001    3.458    3.459
 calculate_dm_sparse                110  9.5    0.001    0.001    3.216    3.240
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.227    3.227
 cp_fm_diag_elpa_base                48 14.0    2.688    2.883    3.225    3.225
 multiply_cannon_sync_h2d          8220 15.4    3.122    3.145    3.122    3.145
 fft3d_ps                          1111 14.6    1.301    1.306    2.994    3.005
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.922    2.925
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.740    2.757
 grid_collocate_task_list           110  9.6    2.684    2.701    2.684    2.701
 potential_pw2rs                    110 12.3    0.021    0.021    2.567    2.579
 rs_pw_transfer                     902 11.9    0.010    0.010    2.445    2.466
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.410    2.426
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.273    2.274
 qs_env_update_s_mstruct             11  6.9    0.000    0.000    2.180    2.248
 mp_alltoall_d11v                  2046 13.8    2.065    2.165    2.065    2.165
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.085    2.160
 cp_fm_cholesky_decompose            22 10.9    2.107    2.125    2.107    2.125
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.992    1.997
 qs_create_task_list                 11  7.9    0.000    0.001    1.887    1.936
 generate_qs_task_list               11  8.9    0.730    0.785    1.887    1.935
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.803    1.832
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=89.884000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1267.818182, yerr=59.618207
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             630.517760E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955383376
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  57761.
 MP_Allreduce        11084                    796.
 MP_Sync                87
 MP_Alltoall          2226                 809702.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.034  209.490  209.491
 qs_mol_dyn_low                       1  2.0    0.004    0.004  209.041  209.057
 qs_forces                           11  3.9    0.005    0.005  208.950  208.955
 qs_energies                         11  4.9    0.001    0.001  203.287  203.309
 scf_env_do_scf                      11  5.9    0.001    0.001  186.677  186.681
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.009  166.146  166.148
 dbcsr_multiply_generic            2507 12.6    0.180    0.184  126.444  127.070
 qs_scf_new_mos                     117  7.6    0.001    0.001  125.618  125.850
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  125.617  125.850
 velocity_verlet                     10  3.0    0.001    0.002  125.353  125.355
 ot_scf_mini                        117  9.6    0.003    0.003  118.848  119.066
 multiply_cannon                   2507 13.6    0.239    0.250  101.983  103.872
 multiply_cannon_loop              2507 14.6    2.443    2.494   99.756  101.655
 ot_mini                            117 10.6    0.001    0.001   66.777   67.002
 multiply_cannon_multrec          60168 15.6   31.720   33.672   41.494   43.615
 qs_ot_get_derivative               117 11.6    0.001    0.001   41.939   42.151
 rebuild_ks_matrix                  128  8.3    0.001    0.001   34.015   34.592
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   34.015   34.592
 mp_waitall_1                    267128 16.5   29.357   32.299   29.357   32.299
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.522   31.089
 qs_ot_get_p                        128 10.4    0.001    0.001   29.758   30.127
 multiply_cannon_sync_h2d         60168 15.6   26.229   28.043   26.229   28.043
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.349   25.086
 apply_single                       128 13.6    0.001    0.001   24.348   25.086
 ot_diis_step                       117 11.6    0.008    0.008   24.592   24.593
 qs_ot_p2m_diag                      83 11.4    0.079    0.092   23.101   23.153
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   20.361   20.546
 init_scf_loop                       11  6.9    0.000    0.001   20.457   20.459
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   20.302   20.303
 multiply_cannon_metrocomm3       60168 15.6    0.121    0.125   16.213   18.413
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   17.327   17.357
 cp_fm_diag_elpa_base                83 14.4   17.255   17.303   17.323   17.355
 prepare_preconditioner              11  7.9    0.000    0.000   15.875   15.936
 make_preconditioner                 11  8.9    0.000    0.000   15.875   15.936
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.122   15.323
 sum_up_and_integrate               128 10.3    0.089    0.107   14.361   14.375
 make_m2s                          5014 13.6    0.103    0.111   13.902   14.297
 integrate_v_rspace                 128 11.3    0.004    0.004   14.272   14.287
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.943   14.132
 calculate_rho_elec                 128  8.7    0.046    0.065   13.942   14.132
 make_images                       5014 14.6    0.398    0.417   13.721   14.120
 init_scf_run                        11  5.9    0.000    0.001   12.428   12.428
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   12.428   12.428
 density_rs2pw                      128  9.7    0.006    0.007    7.137   10.615
 mp_sum_l                          7950 12.9    9.293   10.518    9.293   10.518
 dbcsr_mm_accdrv_process         124484 16.2    4.747    4.899    9.343    9.894
 rs_pw_transfer                    1046 11.9    0.016    0.018    5.837    9.310
 cp_fm_cholesky_invert               11 10.9    9.122    9.131    9.122    9.131
 wfi_extrapolate                     11  7.9    0.001    0.001    9.104    9.104
 calculate_dm_sparse                128  9.5    0.001    0.001    8.633    8.749
 multiply_cannon_metrocomm1       60168 15.6    0.097    0.100    6.571    8.690
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.203    8.333
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    8.131    8.202
 pw_transfer                       1547 11.6    0.074    0.092    7.835    8.028
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.013    7.632    7.814
 make_images_data                  5014 15.6    0.068    0.075    6.837    7.702
 grid_integrate_task_list           128 12.3    7.032    7.537    7.032    7.537
 hybrid_alltoall_any               5200 16.5    0.295    2.272    5.962    7.284
 fft_wrap_pw1pw2_140                523 13.2    1.274    1.313    6.659    6.825
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.003    6.643    6.655
 mp_waitany                       16020 13.9    2.730    6.298    2.730    6.298
 grid_collocate_task_list           128  9.7    4.681    5.969    4.681    5.969
 mp_alltoall_d11v                  2415 14.1    4.391    5.941    4.391    5.941
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.766    5.897
 fft3d_ps                          1291 14.7    2.141    2.798    5.397    5.737
 rs_pw_transfer_RS2PW_140           139 11.5    0.277    0.291    2.139    5.634
 potential_pw2rs                    128 12.3    0.009    0.010    4.681    4.705
 cp_fm_cholesky_decompose            22 10.9    4.591    4.606    4.591    4.606
 mp_sum_d                          4470 12.1    3.403    4.239    3.403    4.239
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=209.491000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=596.909091, yerr=6.402479
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.183246E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               5975232       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.7
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             840.126464E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2406720
 MPI messages size (bytes):
  total size                         4.100942E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.703955E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               70860               2317615104
     32768 < size <=   131072              722992              55511613440
    131072 < size <=  4194304             1375664            1398181724160
   4194304 < size <= 16777216              154704            1463834332048
  16777216 < size                           67584            1181116006400
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3994                  58329.
 MP_Allreduce        11063                    960.
 MP_Sync                87
 MP_Alltoall          1969                5618844.
 MP_SendRecv         12032                  47072.
 MP_ISendRecv        12032                  47072.
 MP_Wait             25916
 MP_ISend            11748                 212467.
 MP_IRecv            11748                 212467.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.032  193.084  193.085
 qs_mol_dyn_low                       1  2.0    0.003    0.004  192.725  192.739
 qs_forces                           11  3.9    0.004    0.005  191.824  191.825
 qs_energies                         11  4.9    0.001    0.001  184.931  184.942
 scf_env_do_scf                      11  5.9    0.001    0.001  168.480  168.490
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  134.963  134.966
 velocity_verlet                     10  3.0    0.001    0.002  122.030  122.048
 dbcsr_multiply_generic            2507 12.6    0.190    0.196   97.690   98.938
 qs_scf_new_mos                     117  7.6    0.001    0.001   94.903   95.509
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   94.902   95.508
 ot_scf_mini                        117  9.6    0.004    0.004   90.053   90.673
 multiply_cannon                   2507 13.6    0.504    0.559   77.351   81.786
 multiply_cannon_loop              2507 14.6    1.571    1.640   73.955   76.795
 ot_mini                            117 10.6    0.001    0.001   50.594   51.180
 mp_waitall_1                    214728 16.6   24.606   39.614   24.606   39.614
 multiply_cannon_multrec          30084 15.6   20.806   25.741   31.335   36.553
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.207   33.946
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.019   33.207   33.945
 init_scf_loop                       11  6.9    0.000    0.000   33.425   33.426
 qs_ks_update_qs_env                128  7.6    0.001    0.001   29.818   30.489
 multiply_cannon_metrocomm3       30084 15.6    0.098    0.104   16.111   29.421
 prepare_preconditioner              11  7.9    0.000    0.000   28.980   29.062
 make_preconditioner                 11  8.9    0.000    0.000   28.980   29.062
 qs_ot_get_derivative               117 11.6    0.001    0.002   28.329   28.940
 make_full_inverse_cholesky          11  9.9    0.000    0.000   27.659   28.200
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   22.359   23.496
 apply_single                       128 13.6    0.001    0.001   22.358   23.495
 qs_ot_get_p                        128 10.4    0.001    0.001   21.461   22.213
 ot_diis_step                       117 11.6    0.014    0.015   22.084   22.087
 multiply_cannon_sync_h2d         30084 15.6   17.750   20.181   17.750   20.181
 cp_fm_cholesky_invert               11 10.9   17.107   17.120   17.107   17.120
 qs_ot_p2m_diag                      83 11.4    0.188    0.216   16.537   16.573
 make_m2s                          5014 13.6    0.088    0.091   14.336   15.996
 make_images                       5014 14.6    1.170    1.380   14.130   15.790
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   15.308   15.309
 sum_up_and_integrate               128 10.3    0.116    0.134   14.830   14.875
 integrate_v_rspace                 128 11.3    0.004    0.004   14.713   14.762
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.290   14.323
 calculate_rho_elec                 128  8.7    0.088    0.104   14.290   14.322
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   12.108   12.145
 cp_fm_diag_elpa_base                83 14.4   11.854   11.957   12.103   12.135
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   11.246   11.660
 init_scf_run                        11  5.9    0.000    0.001   11.595   11.596
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   11.594   11.596
 multiply_cannon_metrocomm4       27577 15.6    0.107    0.123    3.952   11.135
 mp_irecv_dv                      69486 16.3    3.743   10.722    3.743   10.722
 dbcsr_mm_accdrv_process          62242 16.2    5.491    6.291    9.988   10.535
 make_images_data                  5014 15.6    0.068    0.076    8.446   10.364
 density_rs2pw                      128  9.7    0.006    0.007    7.578   10.109
 hybrid_alltoall_any               5200 16.5    0.351    1.528    7.265    9.621
 pw_transfer                       1547 11.6    0.086    0.098    8.913    8.977
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.011    8.688    8.752
 wfi_extrapolate                     11  7.9    0.001    0.001    8.414    8.414
 rs_pw_transfer                    1046 11.9    0.014    0.017    5.857    8.403
 fft_wrap_pw1pw2_140                523 13.2    1.340    1.356    7.666    7.751
 grid_integrate_task_list           128 12.3    7.185    7.539    7.185    7.539
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.286    7.074
 cp_fm_cholesky_decompose            22 10.9    6.988    7.073    6.988    7.073
 calculate_dm_sparse                128  9.5    0.001    0.001    6.511    6.639
 mp_sum_l                          7950 12.9    4.204    6.392    4.204    6.392
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.119    6.127
 fft3d_ps                          1291 14.7    2.814    2.988    5.986    6.045
 grid_collocate_task_list           128  9.7    4.782    5.975    4.782    5.975
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    5.427    5.505
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.204    5.402
 mp_allgather_i34                  2507 14.6    1.927    5.146    1.927    5.146
 mp_waitany                       11748 13.9    2.526    5.126    2.526    5.126
 potential_pw2rs                    128 12.3    0.015    0.017    5.068    5.097
 mp_alltoall_d11v                  2415 14.1    4.143    4.757    4.143    4.757
 rs_pw_transfer_RS2PW_140           139 11.5    0.351    0.373    2.099    4.629
 mp_sum_d                          4466 12.1    2.690    4.146    2.690    4.146
 dbcsr_complete_redistribute        395 12.7    0.782    0.861    3.153    4.007
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=193.085000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=800.818182, yerr=2.166614
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.928533E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               3984192       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1695.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             950.394880E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1042912
 MPI messages size (bytes):
  total size                         2.716210E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.604448E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              281856              36943429632
    131072 < size <=  4194304              660064             996105256960
   4194304 < size <= 16777216               65632             931530938576
  16777216 < size                           28672             751619276800
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58351.
 MP_Allreduce        11057                   1000.
 MP_Sync                87
 MP_Alltoall          1712                9388896.
 MP_SendRecv          7936                  75008.
 MP_ISendRecv         7936                  75008.
 MP_Wait             21820
 MP_ISend            11748                 275205.
 MP_IRecv            11748                 275205.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.032  177.379  177.380
 qs_mol_dyn_low                       1  2.0    0.003    0.004  176.947  176.963
 qs_forces                           11  3.9    0.004    0.005  176.844  176.846
 qs_energies                         11  4.9    0.001    0.001  170.140  170.150
 scf_env_do_scf                      11  5.9    0.001    0.001  154.528  154.529
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.009  119.212  119.212
 velocity_verlet                     10  3.0    0.002    0.002  113.958  113.961
 dbcsr_multiply_generic            2507 12.6    0.185    0.200   82.067   83.143
 qs_scf_new_mos                     117  7.6    0.001    0.001   80.617   80.969
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   80.616   80.968
 ot_scf_mini                        117  9.6    0.003    0.004   76.400   76.780
 multiply_cannon                   2507 13.6    0.511    0.535   62.466   66.585
 multiply_cannon_loop              2507 14.6    1.130    1.197   59.292   61.963
 ot_mini                            117 10.6    0.001    0.001   42.737   43.133
 init_scf_loop                       11  6.9    0.000    0.000   35.212   35.213
 mp_waitall_1                    170520 16.6   24.788   34.304   24.788   34.304
 rebuild_ks_matrix                  128  8.3    0.001    0.001   30.987   31.466
 qs_ks_build_kohn_sham_matrix       128  9.3    0.016    0.017   30.986   31.466
 prepare_preconditioner              11  7.9    0.000    0.000   31.138   31.199
 make_preconditioner                 11  8.9    0.000    0.000   31.138   31.199
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.750   30.169
 qs_ks_update_qs_env                128  7.6    0.001    0.001   27.864   28.302
 multiply_cannon_multrec          20056 15.6   12.985   15.748   22.257   25.051
 multiply_cannon_metrocomm3       20056 15.6    0.063    0.067   15.629   24.903
 qs_ot_get_derivative               117 11.6    0.001    0.002   22.884   23.265
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.995   21.109
 apply_single                       128 13.6    0.001    0.001   19.994   21.108
 ot_diis_step                       117 11.6    0.018    0.018   19.745   19.745
 qs_ot_get_p                        128 10.4    0.001    0.001   19.013   19.520
 make_m2s                          5014 13.6    0.079    0.084   14.551   15.901
 make_images                       5014 14.6    1.145    1.242   14.320   15.670
 multiply_cannon_sync_h2d         20056 15.6   13.523   15.170   13.523   15.170
 qs_ot_p2m_diag                      83 11.4    0.265    0.272   14.794   14.806
 sum_up_and_integrate               128 10.3    0.132    0.144   14.738   14.763
 integrate_v_rspace                 128 11.3    0.004    0.005   14.606   14.637
 cp_fm_cholesky_invert               11 10.9   14.621   14.631   14.621   14.631
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.581   14.613
 calculate_rho_elec                 128  8.7    0.130    0.145   14.581   14.613
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   13.694   13.703
 init_scf_run                        11  5.9    0.000    0.001   10.599   10.600
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   10.599   10.599
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   10.479   10.502
 cp_fm_diag_elpa_base                83 14.4   10.069   10.236   10.476   10.499
 make_images_data                  5014 15.6    0.063    0.071    8.623   10.418
 hybrid_alltoall_any               5200 16.5    0.450    2.050    7.490    9.771
 multiply_cannon_metrocomm4       17549 15.6    0.065    0.076    3.552    9.566
 density_rs2pw                      128  9.7    0.006    0.006    7.231    9.372
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.051    9.339
 mp_irecv_dv                      50230 16.2    3.424    9.303    3.424    9.303
 pw_transfer                       1547 11.6    0.086    0.104    8.930    9.034
 dbcsr_mm_accdrv_process          41502 16.2    5.608    5.932    8.735    8.888
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    8.706    8.815
 fft_wrap_pw1pw2_140                523 13.2    1.420    1.443    7.707    7.831
 grid_integrate_task_list           128 12.3    7.279    7.724    7.279    7.724
 wfi_extrapolate                     11  7.9    0.001    0.001    7.495    7.495
 cp_fm_cholesky_decompose            22 10.9    7.437    7.466    7.437    7.466
 rs_pw_transfer                    1046 11.9    0.013    0.015    5.239    7.381
 cp_fm_upper_to_full                105 14.8    5.627    7.181    5.627    7.181
 dbcsr_complete_redistribute        395 12.7    1.172    1.208    4.645    6.362
 calculate_dm_sparse                128  9.5    0.001    0.001    5.827    5.926
 grid_collocate_task_list           128  9.7    4.985    5.909    4.985    5.909
 fft3d_ps                          1291 14.7    2.742    2.969    5.817    5.898
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.506    5.513
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.545    5.267
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.456    5.173
 mp_alltoall_d11v                  2415 14.1    4.484    5.046    4.484    5.046
 potential_pw2rs                    128 12.3    0.020    0.022    4.802    4.821
 mp_allgather_i34                  2507 14.6    1.679    4.770    1.679    4.770
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.623    4.756
 mp_sum_l                          7950 12.9    3.208    4.643    3.208    4.643
 mp_waitany                       11748 13.9    2.373    4.546    2.373    4.546
 transfer_fm_to_dbcsr                11  9.9    0.017    0.020    2.369    4.074
 rs_pw_transfer_RS2PW_140           139 11.5    0.329    0.352    1.921    4.071
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.952    3.983
 mp_alltoall_i22                    716 14.1    1.971    3.798    1.971    3.798
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.762    3.763
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.621    3.663
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=177.380000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=900.818182, yerr=10.070824
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022950912       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963542011904       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444706349056       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019182452736       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019182452736       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796564E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.320337E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499488       0.0%      0.0%    100.0%
 number of processed stacks               5927808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1131.2
 marketing flops                   143.508480E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               1.189376E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1133160
 MPI messages size (bytes):
  total size                         2.008142E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.772161E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              315952              35695099904
    131072 < size <=  4194304              709496             778939400192
   4194304 < size <= 16777216               69840             660837542000
  16777216 < size                           30480             532676608000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  58189.
 MP_Allreduce        11085                   1083.
 MP_Sync                86
 MP_Alltoall          1700               12496381.
 MP_SendRecv          5842                  75008.
 MP_ISendRecv         5842                  75008.
 MP_Wait             22272
 MP_ISend            14840                 244848.
 MP_IRecv            14840                 244848.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.032  189.474  189.474
 qs_mol_dyn_low                       1  2.0    0.003    0.003  189.104  189.119
 qs_forces                           11  3.9    0.004    0.005  188.991  188.998
 qs_energies                         11  4.9    0.001    0.001  181.688  181.698
 scf_env_do_scf                      11  5.9    0.001    0.001  164.623  164.630
 velocity_verlet                     10  3.0    0.002    0.002  125.103  125.105
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  118.032  118.033
 qs_scf_new_mos                     116  7.6    0.001    0.001   80.455   80.813
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   80.455   80.813
 dbcsr_multiply_generic            2485 12.5    0.185    0.192   80.037   80.777
 ot_scf_mini                        116  9.6    0.003    0.003   75.905   76.240
 multiply_cannon                   2485 13.5    0.547    0.583   55.231   57.782
 multiply_cannon_loop              2485 14.5    1.824    1.885   51.578   53.501
 init_scf_loop                       11  6.9    0.000    0.000   46.469   46.470
 ot_mini                            116 10.6    0.001    0.001   42.624   42.943
 prepare_preconditioner              11  7.9    0.000    0.000   42.363   42.383
 make_preconditioner                 11  8.9    0.000    0.000   42.363   42.383
 make_full_inverse_cholesky          11  9.9    0.011    0.023   35.981   41.010
 multiply_cannon_multrec          29820 15.5   13.543   18.548   26.343   31.048
 rebuild_ks_matrix                  127  8.3    0.001    0.001   29.992   30.333
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.019   29.992   30.333
 mp_waitall_1                    146592 16.7   17.323   27.886   17.323   27.886
 qs_ks_update_qs_env                127  7.6    0.001    0.001   26.949   27.241
 qs_ot_get_derivative               116 11.6    0.001    0.002   23.199   23.534
 make_m2s                          4970 13.5    0.094    0.098   20.428   21.521
 make_images                       4970 14.5    1.915    2.250   20.124   21.215
 qs_ot_get_p                        127 10.4    0.001    0.001   19.497   19.867
 apply_preconditioner_dbcsr         127 12.6    0.000    0.001   18.824   19.408
 apply_single                       127 13.6    0.001    0.001   18.824   19.408
 ot_diis_step                       116 11.6    0.017    0.018   19.298   19.301
 cp_fm_upper_to_full                104 14.8   11.362   16.791   11.362   16.791
 cp_fm_cholesky_invert               11 10.9   16.257   16.266   16.257   16.266
 multiply_cannon_metrocomm3       29820 15.5    0.050    0.053    6.239   15.481
 qs_ot_p2m_diag                      82 11.4    0.338    0.385   15.312   15.365
 sum_up_and_integrate               127 10.3    0.139    0.149   14.723   14.751
 qs_rho_update_rho_low              127  7.7    0.001    0.001   14.676   14.723
 calculate_rho_elec                 127  8.7    0.172    0.188   14.675   14.722
 integrate_v_rspace                 127 11.3    0.004    0.004   14.584   14.615
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   13.905   13.906
 dbcsr_complete_redistribute        393 12.7    1.597    1.737    9.239   12.967
 make_images_data                  4970 15.5    0.065    0.069   11.102   12.902
 dbcsr_mm_accdrv_process          61748 16.2    8.398    9.133   12.369   12.842
 hybrid_alltoall_any               5155 16.4    0.526    2.194    9.685   12.234
 copy_fm_to_dbcsr                   208 11.6    0.001    0.002    7.835   11.562
 init_scf_run                        11  5.9    0.000    0.001   11.111   11.113
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   11.111   11.112
 multiply_cannon_sync_h2d         29820 15.5   10.247   11.085   10.247   11.085
 cp_fm_diag_elpa                     82 13.4    0.000    0.001   10.717   10.732
 cp_fm_diag_elpa_base                82 14.4    9.754   10.072   10.710   10.724
 transfer_fm_to_dbcsr                11  9.9    0.001    0.003    6.360    9.974
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.711    9.954
 mp_alltoall_i22                    712 14.1    5.616    9.336    5.616    9.336
 pw_transfer                       1535 11.6    0.086    0.101    9.193    9.271
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    8.969    9.054
 density_rs2pw                      127  9.7    0.006    0.006    7.056    8.428
 fft_wrap_pw1pw2_140                519 13.2    1.554    1.580    7.967    8.061
 grid_integrate_task_list           127 12.3    7.389    7.895    7.389    7.895
 wfi_extrapolate                     11  7.9    0.001    0.001    7.858    7.858
 cp_fm_cholesky_decompose            22 10.9    7.630    7.722    7.630    7.722
 multiply_cannon_metrocomm4       24850 15.5    0.082    0.094    2.785    6.910
 mp_irecv_dv                      75445 16.2    2.632    6.627    2.632    6.627
 calculate_dm_sparse                127  9.5    0.001    0.001    6.236    6.331
 rs_pw_transfer                    1038 11.9    0.013    0.014    4.754    6.197
 fft3d_ps                          1281 14.7    2.817    2.879    5.908    5.967
 mp_alltoall_d11v                  2401 14.1    5.036    5.936    5.036    5.936
 grid_collocate_task_list           127  9.7    5.121    5.788    5.121    5.788
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.595    5.647
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.504    4.614
 potential_pw2rs                    127 12.3    0.023    0.024    4.583    4.597
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.396    4.493
 qs_energies_init_hamiltonians       11  5.9    0.029    0.030    4.489    4.490
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    4.203    4.274
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=189.474000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1103.727273, yerr=28.390430
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.865089E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks               1960712       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3445.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.563156E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  240672
 MPI messages size (bytes):
  total size                         1.331455E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.532238E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              113904              59718500352
   4194304 < size <= 16777216              104976             550376570880
  16777216 < size                           20208             721350232272
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8931                     51.
 MP_Alltoall          9654                 799394.
 MP_ISend            40068                2102573.
 MP_IRecv            40068                2101676.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  58203.
 MP_Allreduce        11082                   1166.
 MP_Sync                87
 MP_Alltoall          1712               18838222.
 MP_SendRecv          3840                 122880.
 MP_ISendRecv         3840                 122880.
 MP_Wait             16122
 MP_ISend            10680                 423556.
 MP_IRecv            10680                 423556.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.058  173.852  173.867
 qs_mol_dyn_low                       1  2.0    0.003    0.003  173.439  173.463
 qs_forces                           11  3.9    0.004    0.005  173.327  173.330
 qs_energies                         11  4.9    0.001    0.001  165.622  165.630
 scf_env_do_scf                      11  5.9    0.001    0.001  148.076  148.082
 velocity_verlet                     10  3.0    0.002    0.002  114.367  114.371
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.009  111.515  111.516
 dbcsr_multiply_generic            2507 12.6    0.180    0.192   72.466   72.985
 qs_scf_new_mos                     117  7.6    0.001    0.001   72.674   72.767
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   72.673   72.766
 ot_scf_mini                        117  9.6    0.003    0.004   68.245   68.329
 multiply_cannon                   2507 13.6    0.560    0.577   53.839   57.034
 multiply_cannon_loop              2507 14.6    0.812    0.852   50.672   51.700
 ot_mini                            117 10.6    0.001    0.001   37.995   38.092
 init_scf_loop                       11  6.9    0.000    0.000   36.414   36.415
 prepare_preconditioner              11  7.9    0.000    0.000   32.475   32.506
 make_preconditioner                 11  8.9    0.000    0.000   32.474   32.506
 mp_waitall_1                    125778 16.7   25.074   31.595   25.074   31.595
 make_full_inverse_cholesky          11  9.9    0.016    0.029   30.352   30.627
 rebuild_ks_matrix                  128  8.3    0.001    0.001   30.076   30.221
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.019   30.075   30.221
 qs_ks_update_qs_env                128  7.6    0.001    0.001   27.283   27.417
 multiply_cannon_multrec          10028 15.6   10.340   15.918   17.909   21.922
 qs_ot_get_derivative               117 11.6    0.001    0.002   20.657   20.739
 multiply_cannon_metrocomm3       10028 15.6    0.025    0.026   13.120   20.320
 cp_fm_cholesky_invert               11 10.9   18.685   18.690   18.685   18.690
 qs_ot_get_p                        128 10.4    0.001    0.001   17.496   17.643
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   17.371   17.583
 apply_single                       128 13.6    0.001    0.001   17.371   17.583
 ot_diis_step                       117 11.6    0.020    0.020   17.265   17.266
 make_m2s                          5014 13.6    0.065    0.069   14.781   15.747
 qs_rho_update_rho_low              128  7.7    0.001    0.001   15.537   15.564
 calculate_rho_elec                 128  8.7    0.256    0.267   15.536   15.563
 make_images                       5014 14.6    2.169    2.605   14.477   15.438
 sum_up_and_integrate               128 10.3    0.182    0.191   15.132   15.177
 integrate_v_rspace                 128 11.3    0.004    0.004   14.950   15.004
 qs_ot_p2m_diag                      83 11.4    0.495    0.501   13.717   13.733
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   12.518   12.518
 multiply_cannon_sync_h2d         10028 15.6   10.754   11.114   10.754   11.114
 init_scf_run                        11  5.9    0.000    0.001   10.693   10.693
 scf_env_initial_rho_setup           11  6.9    0.000    0.001   10.692   10.693
 make_images_data                  5014 15.6    0.056    0.064    8.441    9.905
 pw_transfer                       1547 11.6    0.087    0.096    9.615    9.656
 cp_fm_diag_elpa                     83 13.4    0.000    0.000    9.602    9.614
 cp_fm_diag_elpa_base                83 14.4    9.359    9.436    9.598    9.611
 hybrid_alltoall_any               5200 16.5    0.844    3.776    8.258    9.475
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    9.392    9.441
 fft_wrap_pw1pw2_140                523 13.2    1.919    1.953    8.289    8.338
 grid_integrate_task_list           128 12.3    7.768    8.197    7.768    8.197
 cp_fm_cholesky_decompose            22 10.9    8.062    8.181    8.062    8.181
 qs_ot_get_derivative_diag           77 12.4    0.002    0.003    8.107    8.164
 dbcsr_mm_accdrv_process          20762 16.1    3.303    4.479    7.222    7.911
 density_rs2pw                      128  9.7    0.006    0.006    6.997    7.909
 wfi_extrapolate                     11  7.9    0.001    0.001    7.527    7.527
 multiply_cannon_metrocomm1       10028 15.6    0.029    0.031    4.246    7.115
 calculate_dm_sparse                128  9.5    0.001    0.001    6.106    6.190
 grid_collocate_task_list           128  9.7    5.473    6.178    5.473    6.178
 mp_alltoall_d11v                  2415 14.1    4.981    5.988    4.981    5.988
 fft3d_ps                          1291 14.7    2.760    2.841    5.858    5.886
 dbcsr_complete_redistribute        395 12.7    2.202    2.248    5.331    5.705
 multiply_cannon_metrocomm4        7521 15.6    0.026    0.029    1.859    5.694
 mp_irecv_dv                      28860 15.9    1.820    5.595    1.820    5.595
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    5.303    5.303
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.252    5.275
 rs_pw_transfer                    1046 11.9    0.012    0.013    4.243    5.196
 mp_allgather_i34                  2507 14.6    1.367    4.685    1.367    4.685
 potential_pw2rs                    128 12.3    0.026    0.026    4.502    4.513
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.069    4.120
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.602    3.948
 copy_fm_to_dbcsr                   209 11.7    0.001    0.002    3.472    3.786
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.625    3.713
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.671    3.713
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.691    3.704
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    3.455    3.526
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=173.867000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1460.454545, yerr=35.546036
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.696234E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks               1964048       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3439.8
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               3.073569E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  100280
 MPI messages size (bytes):
  total size                         1.136195E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.330227E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               45208              35089547264
   4194304 < size <= 16777216               44352             379752284160
  16777216 < size                           10104             721350232272
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4002                  59136.
 MP_Allreduce        11082                   1503.
 MP_Sync                87
 MP_Alltoall          1712               36974159.
 MP_SendRecv          1792                 218624.
 MP_ISendRecv         1792                 218624.
 MP_Wait              9802
 MP_ISend             6408                1080322.
 MP_IRecv             6408                1080322.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.037    0.062  301.522  301.530
 qs_mol_dyn_low                       1  2.0    0.003    0.003  300.901  300.926
 qs_forces                           11  3.9    0.005    0.006  300.794  300.797
 qs_energies                         11  4.9    0.001    0.001  291.263  291.269
 scf_env_do_scf                      11  5.9    0.001    0.001  268.183  268.197
 velocity_verlet                     10  3.0    0.002    0.002  217.196  217.205
 scf_env_do_scf_inner_loop          117  6.6    0.004    0.009  142.467  142.469
 init_scf_loop                       11  6.9    0.000    0.000  125.416  125.420
 prepare_preconditioner              11  7.9    0.000    0.000  120.239  120.259
 make_preconditioner                 11  8.9    0.000    0.000  120.238  120.259
 make_full_inverse_cholesky          11  9.9    0.038    0.039   96.131  117.367
 qs_scf_new_mos                     117  7.6    0.001    0.001   92.177   92.278
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   92.176   92.278
 ot_scf_mini                        117  9.6    0.003    0.004   87.232   87.285
 dbcsr_multiply_generic            2507 12.6    0.213    0.225   83.540   83.987
 cp_fm_upper_to_full                105 14.8   52.358   75.470   52.358   75.470
 multiply_cannon                   2507 13.6    0.679    0.726   59.937   60.970
 multiply_cannon_loop              2507 14.6    1.059    1.075   55.903   57.162
 ot_mini                            117 10.6    0.001    0.001   45.299   45.355
 dbcsr_complete_redistribute        395 12.7    4.034    4.123   30.237   43.373
 copy_fm_to_dbcsr                   209 11.7    0.001    0.002   26.634   39.748
 rebuild_ks_matrix                  128  8.3    0.001    0.001   37.944   37.975
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.018   37.943   37.974
 transfer_fm_to_dbcsr                11  9.9    0.030    0.031   24.064   36.968
 mp_alltoall_i22                    716 14.1   21.830   35.088   21.830   35.088
 qs_ks_update_qs_env                128  7.6    0.001    0.001   34.830   34.860
 cp_fm_cholesky_invert               11 10.9   34.261   34.268   34.261   34.268
 mp_waitall_1                    103674 16.8   29.227   33.681   29.227   33.681
 qs_ot_get_p                        128 10.4    0.001    0.001   26.655   26.686
 qs_ot_get_derivative               117 11.6    0.002    0.002   25.366   25.422
 qs_ot_p2m_diag                      83 11.4    0.880    0.884   22.529   22.559
 multiply_cannon_metrocomm3       10028 15.6    0.025    0.027   19.891   21.200
 qs_rho_update_rho_low              128  7.7    0.001    0.001   20.729   20.747
 calculate_rho_elec                 128  8.7    0.481    0.481   20.729   20.747
 cp_dbcsr_syevd                      83 12.4    0.006    0.006   20.667   20.668
 make_m2s                          5014 13.6    0.075    0.077   18.727   20.203
 ot_diis_step                       117 11.6    0.022    0.022   19.891   19.892
 sum_up_and_integrate               128 10.3    0.321    0.324   19.709   19.795
 make_images                       5014 14.6    3.066    3.271   18.249   19.729
 integrate_v_rspace                 128 11.3    0.004    0.004   19.387   19.471
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.180   19.273
 apply_single                       128 13.6    0.001    0.001   19.180   19.273
 multiply_cannon_multrec          10028 15.6   10.311   12.121   18.060   18.166
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   17.419   17.420
 cp_fm_diag_elpa_base                83 14.4   12.976   14.608   17.415   17.416
 multiply_cannon_sync_h2d         10028 15.6   14.298   14.336   14.298   14.336
 hybrid_alltoall_any               5200 16.5    1.316    3.038   10.645   12.818
 make_images_data                  5014 15.6    0.064    0.069   10.523   12.714
 init_scf_run                        11  5.9    0.000    0.001   12.652   12.653
 scf_env_initial_rho_setup           11  6.9    0.000    0.000   12.652   12.653
 pw_transfer                       1547 11.6    0.093    0.095   11.760   11.768
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.011   11.525   11.531
 fft_wrap_pw1pw2_140                523 13.2    3.106    3.139   10.198   10.207
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   10.022   10.068
 mp_alltoall_d11v                  2415 14.1    8.340    9.711    8.340    9.711
 wfi_extrapolate                     11  7.9    0.001    0.001    9.504    9.504
 dbcsr_mm_accdrv_process          20762 16.1    4.268    6.178    7.499    9.402
 cp_fm_cholesky_decompose            22 10.9    9.374    9.392    9.374    9.392
 grid_integrate_task_list           128 12.3    8.596    8.798    8.596    8.798
 density_rs2pw                      128  9.7    0.005    0.006    8.166    8.306
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    8.213    8.214
 calculate_dm_sparse                128  9.5    0.001    0.001    6.834    6.914
 fft3d_ps                          1291 14.7    2.853    2.862    6.563    6.604
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.494    6.562
 grid_collocate_task_list           128  9.7    6.440    6.518    6.440    6.518
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    6.299    6.391
 rs_scatter_matrices                139  9.7    3.874    4.842    6.107    6.324
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=301.530000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2759.727273, yerr=155.351853
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.259667E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255646.
 MP_Allreduce         3139                   6114.
 MP_Sync                 4
 MP_Alltoall            54               22988902.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.033    0.040   84.907   84.908
 qs_energies                          1  2.0    0.000    0.000   84.370   84.377
 ls_scf                               1  3.0    0.000    0.000   83.470   83.478
 dbcsr_multiply_generic             111  6.7    0.014    0.015   72.445   72.606
 multiply_cannon                    111  7.7    0.017    0.020   55.804   57.075
 multiply_cannon_loop               111  8.7    0.227    0.243   52.388   53.814
 ls_scf_main                          1  4.0    0.000    0.000   52.327   52.327
 density_matrix_trs4                  2  5.0    0.002    0.003   46.787   46.861
 ls_scf_init_scf                      1  4.0    0.000    0.001   28.108   28.109
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   26.967   27.019
 mp_waitall_1                     11031 10.9   22.308   25.386   22.308   25.386
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   24.858   24.878
 multiply_cannon_multrec           2664  9.7    8.156    8.918   15.543   17.246
 multiply_cannon_sync_h2d          2664  9.7   13.590   14.932   13.590   14.932
 make_m2s                           222  7.7    0.008    0.011   13.046   13.507
 make_images                        222  8.7    0.099    0.109   13.024   13.488
 multiply_cannon_metrocomm1        2664  9.7    0.010    0.011    9.619   12.318
 make_images_data                   222  9.7    0.004    0.005    7.608    8.182
 hybrid_alltoall_any                227 10.6    0.215    1.835    6.541    8.092
 dbcsr_mm_accdrv_process           4760 10.4    0.586    0.676    7.003    7.960
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.415    7.601
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.217    7.108    6.217    7.108
 calculate_norms                   4752  9.8    5.510    6.233    5.510    6.233
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.007    5.151
 mp_sum_l                           887  5.1    3.065    4.570    3.065    4.570
 make_images_sizes                  222  9.7    0.000    0.000    0.773    3.831
 mp_alltoall_i44                    222 10.7    0.773    3.831    0.773    3.831
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.279    3.531
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.014    2.048    3.259
 arnoldi_extremal                     4  6.8    0.000    0.000    3.225    3.247
 arnoldi_normal_ev                    4  7.8    0.001    0.002    3.224    3.247
 mp_irecv_dv                       6231 10.9    2.032    3.232    2.032    3.232
 build_subspace                      16  8.4    0.009    0.012    3.131    3.133
 ls_scf_post                          1  4.0    0.000    0.000    3.035    3.042
 ls_scf_store_result                  1  5.0    0.000    0.000    2.859    2.893
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.314    2.790
 dbcsr_merge_single_wm              555 10.7    0.456    0.584    2.305    2.781
 make_images_pack                   222  9.7    2.204    2.623    2.206    2.625
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.013    2.334    2.568
 dbcsr_sort_data                    658 11.4    2.107    2.508    2.107    2.508
 dbcsr_matrix_vector_mult_local     304 10.0    2.068    2.458    2.070    2.460
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.318    2.395
 buffer_matrices_ensure_size        222  8.7    1.749    2.043    1.749    2.043
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.778    1.780
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.769    1.770
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    1.769    1.770
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=84.908000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1140.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.161672E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  10075.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.040   89.772   89.773
 qs_energies                          1  2.0    0.000    0.000   89.327   89.331
 ls_scf                               1  3.0    0.000    0.000   87.992   87.995
 dbcsr_multiply_generic             111  6.7    0.016    0.016   74.005   74.346
 multiply_cannon                    111  7.7    0.027    0.042   52.414   56.440
 ls_scf_main                          1  4.0    0.000    0.000   54.316   54.321
 multiply_cannon_loop               111  8.7    0.135    0.147   49.686   52.963
 density_matrix_trs4                  2  5.0    0.002    0.003   48.575   48.819
 ls_scf_init_scf                      1  4.0    0.000    0.001   29.946   29.948
 mp_waitall_1                      9105 10.9   20.489   29.506   20.489   29.506
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.723   28.835
 multiply_cannon_multrec           1332  9.7   13.418   17.546   22.719   27.955
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.317   26.330
 multiply_cannon_metrocomm3        1332  9.7    0.007    0.008   11.075   19.961
 make_m2s                           222  7.7    0.006    0.008   15.246   15.776
 make_images                        222  8.7    1.370    1.691   15.215   15.746
 dbcsr_mm_accdrv_process           4041 10.4    0.342    0.510    8.897   10.442
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.415    9.913    8.415    9.913
 hybrid_alltoall_any                227 10.6    0.540    2.553    8.304    9.661
 make_images_data                   222  9.7    0.004    0.005    8.895    9.649
 mp_sum_l                           887  5.1    4.878    8.042    4.878    8.042
 multiply_cannon_metrocomm4        1221  9.7    0.007    0.009    3.194    7.688
 mp_irecv_dv                       3311 11.0    3.173    7.631    3.173    7.631
 calculate_norms                   2376  9.8    6.074    6.830    6.074    6.830
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.632    6.361
 multiply_cannon_sync_h2d          1332  9.7    4.957    6.274    4.957    6.274
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.034    5.255
 arnoldi_extremal                     4  6.8    0.000    0.000    4.711    4.729
 arnoldi_normal_ev                    4  7.8    0.001    0.004    4.711    4.729
 build_subspace                      16  8.4    0.014    0.020    4.460    4.463
 ls_scf_post                          1  4.0    0.000    0.000    3.729    3.733
 ls_scf_store_result                  1  5.0    0.000    0.000    3.414    3.555
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.133    3.376
 dbcsr_matrix_vector_mult_local     304 10.0    2.747    3.233    2.749    3.234
 mp_allgather_i34                   111  8.7    0.726    2.918    0.726    2.918
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.614    2.713
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.165    2.514
 dbcsr_data_new                    4174 10.1    2.116    2.398    2.116    2.398
 make_images_pack                   222  9.7    1.821    2.118    1.823    2.120
 dbcsr_sort_data                    436 11.2    1.816    2.059    1.816    2.059
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.867    1.869
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.854    1.857
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    1.854    1.857
 make_images_sizes                  222  9.7    0.000    0.000    0.608    1.856
 mp_alltoall_i44                    222 10.7    0.608    1.856    0.608    1.856
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=89.773000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1804.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.885620E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265448.
 MP_Allreduce         3138                  10896.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.054    0.095   93.711   93.713
 qs_energies                          1  2.0    0.003    0.024   92.800   92.831
 ls_scf                               1  3.0    0.000    0.001   91.360   91.373
 dbcsr_multiply_generic             111  6.7    0.016    0.019   75.319   75.633
 ls_scf_main                          1  4.0    0.000    0.001   56.633   56.638
 multiply_cannon                    111  7.7    0.033    0.077   51.914   56.147
 multiply_cannon_loop               111  8.7    0.117    0.130   49.129   53.123
 density_matrix_trs4                  2  5.0    0.002    0.003   50.703   50.886
 mp_waitall_1                      7281 11.0   23.471   33.285   23.471   33.285
 ls_scf_init_scf                      1  4.0    0.019    0.034   31.063   31.066
 ls_scf_init_matrix_S                 1  5.0    0.001    0.011   29.822   29.911
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.921   26.951
 multiply_cannon_multrec            888  9.7   12.672   15.285   21.243   24.656
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   10.915   22.754
 make_m2s                           222  7.7    0.006    0.008   16.556   17.318
 make_images                        222  8.7    1.584    1.865   16.518   17.276
 make_images_data                   222  9.7    0.004    0.005    9.618   10.790
 hybrid_alltoall_any                227 10.6    0.650    2.951    9.233   10.714
 dbcsr_mm_accdrv_process           3754 10.4    0.299    0.489    8.102    9.341
 mp_sum_l                           887  5.1    5.176    9.339    5.176    9.339
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.667    8.852    7.667    8.852
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.588    7.531
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.027    7.428
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.475    7.216
 mp_irecv_dv                       2335 11.1    2.459    7.165    2.459    7.165
 multiply_cannon_sync_h2d           888  9.7    6.095    7.099    6.095    7.099
 arnoldi_extremal                     4  6.8    0.000    0.000    5.223    5.257
 arnoldi_normal_ev                    4  7.8    0.001    0.004    5.223    5.257
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.746    5.013
 build_subspace                      16  8.4    0.014    0.020    4.766    4.773
 calculate_norms                   1584  9.8    4.345    4.765    4.345    4.765
 mp_allgather_i34                   111  8.7    0.913    3.833    0.913    3.833
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.021    3.449    3.779
 ls_scf_post                          1  4.0    0.001    0.010    3.664    3.676
 dbcsr_matrix_vector_mult_local     304 10.0    3.033    3.600    3.035    3.602
 ls_scf_store_result                  1  5.0    0.000    0.000    3.401    3.483
 ls_scf_dm_to_ks                      2  5.0    0.000    0.001    2.869    2.958
 dbcsr_data_new                    4116  9.9    2.106    2.467    2.106    2.467
 make_images_sizes                  222  9.7    0.000    0.000    1.101    2.223
 mp_alltoall_i44                    222 10.7    1.100    2.223    1.100    2.223
 dbcsr_sort_data                    325 11.1    1.880    2.120    1.880    2.120
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.911    1.913
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.893    1.895
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    1.893    1.895
 make_images_pack                   222  9.7    1.621    1.875    1.624    1.877
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=93.713000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2251.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.337404E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  13030.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.086    0.222   98.407   98.443
 qs_energies                          1  2.0    0.000    0.001   97.295   97.306
 ls_scf                               1  3.0    0.000    0.001   95.105   95.114
 dbcsr_multiply_generic             111  6.7    0.018    0.022   78.808   79.049
 ls_scf_main                          1  4.0    0.000    0.002   59.152   59.153
 multiply_cannon                    111  7.7    0.052    0.145   51.740   56.471
 density_matrix_trs4                  2  5.0    0.002    0.004   52.978   53.079
 multiply_cannon_loop               111  8.7    0.154    0.167   46.755   50.017
 ls_scf_init_scf                      1  4.0    0.000    0.002   32.692   32.695
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   31.455   31.528
 mp_waitall_1                      6369 11.0   22.977   29.010   22.977   29.010
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.002   28.974   28.989
 multiply_cannon_multrec           1332  9.7   14.118   17.533   22.029   25.066
 make_m2s                           222  7.7    0.007    0.009   21.258   22.666
 make_images                        222  8.7    3.139    3.602   21.208   22.619
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.004    9.250   17.540
 make_images_data                   222  9.7    0.004    0.005   11.905   13.424
 hybrid_alltoall_any                227 10.6    0.799    3.791   11.354   12.917
 dbcsr_mm_accdrv_process           3641 10.4    0.303    0.481    7.547    9.098
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.106    8.603    7.106    8.603
 mp_sum_l                           887  5.1    4.404    8.370    4.404    8.370
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.229    6.421
 multiply_cannon_metrocomm4        1110  9.7    0.005    0.007    2.102    6.083
 mp_irecv_dv                       3229 10.9    2.076    5.995    2.076    5.995
 multiply_cannon_sync_h2d          1332  9.7    5.439    5.980    5.439    5.980
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.653    5.408
 arnoldi_extremal                     4  6.8    0.000    0.000    5.233    5.254
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.233    5.254
 build_subspace                      16  8.4    0.014    0.020    4.899    4.909
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.552    4.714
 calculate_norms                   2376  9.8    4.175    4.511    4.175    4.511
 mp_allgather_i34                   111  8.7    2.101    4.325    2.101    4.325
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.021    3.590    3.898
 dbcsr_matrix_vector_mult_local     304 10.0    3.209    3.708    3.212    3.710
 dbcsr_sort_data                    658 11.4    3.136    3.447    3.136    3.447
 ls_scf_post                          1  4.0    0.000    0.001    3.261    3.267
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.879    3.199
 dbcsr_merge_single_wm              555 10.7    0.539    0.650    2.870    3.191
 ls_scf_dm_to_ks                      2  5.0    0.000    0.001    3.058    3.092
 ls_scf_store_result                  1  5.0    0.000    0.000    3.006    3.066
 dbcsr_data_release               10477 10.7    1.585    2.401    1.585    2.401
 qs_energies_init_hamiltonians        1  3.0    0.001    0.002    2.179    2.179
 dbcsr_finalize                     304  7.8    0.049    0.061    1.810    1.996
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=98.443000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2704.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.731859E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265536.
 MP_Allreduce         3129                  15263.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.044    0.055   92.657   92.658
 qs_energies                          1  2.0    0.000    0.000   91.544   91.550
 ls_scf                               1  3.0    0.000    0.000   89.610   89.615
 dbcsr_multiply_generic             111  6.7    0.018    0.019   70.882   71.075
 ls_scf_main                          1  4.0    0.000    0.000   56.750   56.751
 multiply_cannon                    111  7.7    0.090    0.190   52.469   56.078
 multiply_cannon_loop               111  8.7    0.088    0.093   49.878   51.417
 density_matrix_trs4                  2  5.0    0.002    0.003   49.677   49.749
 ls_scf_init_scf                      1  4.0    0.000    0.001   29.466   29.467
 mp_waitall_1                      5436 11.0   24.400   28.998   24.400   28.998
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.191   28.218
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.078   26.089
 multiply_cannon_multrec            444  9.7   13.764   16.264   20.834   23.072
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.671   15.691
 make_m2s                           222  7.7    0.005    0.005   13.668   14.636
 make_images                        222  8.7    2.038    2.475   13.601   14.568
 multiply_cannon_metrocomm3         444  9.7    0.001    0.002    6.157   14.295
 hybrid_alltoall_any                227 10.6    0.802    3.835    8.175    9.835
 make_images_data                   222  9.7    0.003    0.004    8.388    9.697
 multiply_cannon_sync_h2d           444  9.7    6.711    8.064    6.711    8.064
 dbcsr_mm_accdrv_process           3003 10.4    0.363    0.401    6.763    7.884
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.400    7.489    6.400    7.489
 arnoldi_extremal                     4  6.8    0.000    0.000    5.956    5.969
 arnoldi_normal_ev                    4  7.8    0.001    0.004    5.956    5.969
 build_subspace                      16  8.4    0.015    0.019    5.558    5.569
 mp_sum_l                           887  5.1    2.736    4.946    2.736    4.946
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.442    4.608
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.021    4.232    4.467
 dbcsr_matrix_vector_mult_local     304 10.0    3.732    4.212    3.735    4.214
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.948    3.985
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.550    3.864
 mp_irecv_dv                       1241 11.2    1.536    3.837    1.536    3.837
 calculate_norms                    792  9.8    3.623    3.777    3.623    3.777
 mp_allgather_i34                   111  8.7    1.172    3.694    1.172    3.694
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.599    3.666
 ls_scf_post                          1  4.0    0.000    0.000    3.394    3.399
 make_images_sizes                  222  9.7    0.000    0.000    0.897    3.392
 mp_alltoall_i44                    222 10.7    0.897    3.391    0.897    3.391
 ls_scf_store_result                  1  5.0    0.000    0.000    3.177    3.221
 dbcsr_data_new                    4608  9.7    1.793    2.297    1.793    2.297
 dbcsr_finalize                     304  7.8    0.062    0.077    2.199    2.278
 dbcsr_merge_all                    275  8.9    0.478    0.528    2.058    2.119
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.088    2.090
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.054    2.056
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    2.054    2.056
 qs_energies_init_hamiltonians        1  3.0    0.000    0.001    1.918    1.918
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=92.658000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3737.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.877089E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284089.
 MP_Allreduce         3123                  21388.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.081    0.094  105.985  105.985
 qs_energies                          1  2.0    0.000    0.000  104.601  104.609
 ls_scf                               1  3.0    0.000    0.000  101.638  101.646
 dbcsr_multiply_generic             111  6.7    0.024    0.027   75.056   75.210
 ls_scf_main                          1  4.0    0.000    0.000   64.106   64.106
 density_matrix_trs4                  2  5.0    0.002    0.003   55.044   55.118
 multiply_cannon                    111  7.7    0.134    0.226   48.778   50.881
 multiply_cannon_loop               111  8.7    0.099    0.100   45.819   46.271
 ls_scf_init_scf                      1  4.0    0.001    0.001   33.729   33.729
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   32.157   32.171
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.443   29.452
 mp_waitall_1                      4527 11.1   22.199   25.969   22.199   25.969
 make_m2s                           222  7.7    0.005    0.005   22.652   23.716
 make_images                        222  8.7    3.567    3.860   22.544   23.608
 multiply_cannon_multrec            444  9.7   17.886   18.494   22.524   23.161
 hybrid_alltoall_any                227 10.6    1.654    3.636   12.821   15.718
 make_images_data                   222  9.7    0.003    0.004   13.049   15.165
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.547   11.098
 multiply_cannon_sync_h2d           444  9.7    8.794    8.837    8.794    8.837
 arnoldi_extremal                     4  6.8    0.000    0.000    7.535    7.546
 arnoldi_normal_ev                    4  7.8    0.002    0.008    7.534    7.546
 build_subspace                      16  8.4    0.026    0.036    6.989    7.002
 dbcsr_matrix_vector_mult           304  9.0    0.017    0.033    5.612    5.768
 dbcsr_matrix_vector_mult_local     304 10.0    5.123    5.437    5.126    5.439
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.161    5.252
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.917    5.170
 dbcsr_mm_accdrv_process           1814 10.4    0.300    0.392    4.465    4.589
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.118    4.247    4.118    4.247
 ls_scf_post                          1  4.0    0.000    0.000    3.803    3.811
 make_images_sizes                  222  9.7    0.000    0.000    1.483    3.677
 mp_alltoall_i44                    222 10.7    1.482    3.676    1.482    3.676
 ls_scf_store_result                  1  5.0    0.000    0.000    3.518    3.558
 mp_allgather_i34                   111  8.7    1.087    3.544    1.087    3.544
 calculate_norms                    792  9.8    3.228    3.268    3.228    3.268
 dbcsr_finalize                     304  7.8    0.082    0.089    3.082    3.167
 dbcsr_merge_all                    275  8.9    0.883    0.920    2.854    2.945
 qs_energies_init_hamiltonians        1  3.0    0.001    0.001    2.933    2.933
 dbcsr_complete_redistribute          5  7.6    1.427    1.466    2.743    2.866
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.379    2.516
 dbcsr_sort_data                    325 11.1    2.438    2.497    2.438    2.497
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.470    2.472
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.405    2.406
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    2.405    2.406
 dbcsr_data_new                    6591  9.6    1.873    2.350    1.873    2.350
 dbcsr_new_transposed                 4  7.5    0.243    0.255    2.309    2.320
 dbcsr_frobenius_norm                74  6.6    2.058    2.134    2.199    2.224
 dbcsr_add_d                        103  6.2    0.000    0.000    2.138    2.208
 dbcsr_add_anytype                  103  7.2    0.859    0.891    2.138    2.208
 dbcsr_data_release               12724 10.6    1.993    2.189    1.993    2.189
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=105.985000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=7038.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/8ce3b69bbf0a9b31bf3df725b357dfb78f348a44_performance_tests/27/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32        7009386627072       0.0%      0.0%    100.0%
 flops     9 x     9 x    32        7335108845568       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        9866241589248       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        9884108906496       0.0%      0.0%    100.0%
 flops    22 x    22 x    32       13354440523776       0.0%      0.0%    100.0%
 flops    32 x    32 x     9       20607185977344       0.0%      0.0%    100.0%
 flops    32 x    32 x    22       25186560638976       0.0%      0.0%    100.0%
 flops     9 x    32 x    32       28458319085568       0.0%      0.0%    100.0%
 flops    22 x    32 x    32       34782389993472       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       42881542373376       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       55680402235392       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       55680402235392       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       72328573419520       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       383.054662E+12       0.0%      0.0%    100.0%
 flops max/rank                    733.641090E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                        26899403712       0.0%      0.0%    100.0%
 number of processed stacks             118860288       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     226.3
 marketing flops                   780.439111E+12
 -------------------------------------------------------------------------------
 # multiplications                           1445
 max memory usage/rank             592.994304E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged               102097920
 MPI messages size (bytes):
  total size                        37.227590E+12
  min size                           0.000000E+00
  max size                           4.551360E+06
  average size                     364.626312E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              731472                        0
       128 < size <=     8192            11922720              97670922240
      8192 < size <=    32768            24718992             614677610496
     32768 < size <=   131072            20000256            1970081366016
    131072 < size <=  4194304            42515668           24886801223040
   4194304 < size <= 16777216             2208812            9656099886720
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4640                  78072.
 MP_Allreduce        13232                   2081.
 MP_Sync              1064
 MP_Alltoall          2588                4057257.
 MP_SendRecv        168740                  11136.
 MP_ISendRecv        92040                  11136.
 MP_Wait            102830
 MP_comm_split          40
 MP_ISend            26090                  85106.
 MP_IRecv            37890                  59644.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.032    0.104  239.256  239.258
 qs_mol_dyn_low                       1  2.0    0.006    0.020  238.168  238.191
 qs_forces                            5  3.8    0.007    0.072  238.009  238.019
 qs_energies                          5  4.8    0.002    0.030  234.760  234.786
 scf_env_do_scf                       5  5.8    0.000    0.001  219.877  219.879
 scf_env_do_scf_inner_loop          105  6.6    0.004    0.018  191.148  191.160
 qs_scf_new_mos                     105  7.6    0.000    0.001  147.879  148.020
 qs_scf_loop_do_ot                  105  8.6    0.001    0.001  147.879  148.019
 ot_scf_mini                        105  9.6    0.003    0.004  137.796  137.920
 dbcsr_multiply_generic            1445 12.2    0.125    0.132  135.012  135.760
 multiply_cannon                   1445 13.2    0.276    0.291  115.186  117.673
 multiply_cannon_loop              1445 14.2    2.857    3.031  113.318  115.785
 velocity_verlet                      4  3.0    0.003    0.011  110.618  110.621
 ot_mini                            105 10.6    0.001    0.007   61.279   61.434
 qs_ot_get_p                        112 10.4    0.001    0.003   46.250   46.648
 mp_waitall_1                    488190 16.1   36.605   44.637   36.605   44.637
 multiply_cannon_multrec          69360 15.2   29.489   34.398   39.262   44.410
 qs_ot_get_derivative                55 11.6    0.001    0.002   39.421   39.611
 qs_ot_p2m_diag                      40 11.0    0.020    0.031   34.670   34.780
 multiply_cannon_metrocomm3       69360 15.2    0.211    0.222   26.723   34.270
 multiply_cannon_sync_h2d         69360 15.2   28.853   32.359   28.853   32.359
 cp_dbcsr_syevd                      40 12.0    0.002    0.003   31.433   31.434
 rebuild_ks_matrix                  110  8.4    0.000    0.000   30.717   30.888
 qs_ks_build_kohn_sham_matrix       110  9.4    0.012    0.022   30.717   30.888
 init_scf_loop                        7  6.6    0.001    0.019   28.695   28.698
 qs_ks_update_qs_env                112  7.6    0.001    0.001   28.180   28.337
 cp_fm_syevd                         40 13.0    0.000    0.002   26.193   26.365
 prepare_preconditioner               7  7.6    0.000    0.005   23.394   23.427
 make_preconditioner                  7  8.6    0.000    0.006   23.394   23.427
 apply_preconditioner_dbcsr          62 12.6    0.000    0.000   23.085   23.300
 apply_single                        62 13.6    0.000    0.000   23.085   23.300
 cp_fm_redistribute_end              40 14.0   10.702   21.347   10.707   21.349
 cp_fm_syevd_base                    40 14.0   10.634   21.281   10.634   21.281
 ot_new_cg_direction                 55 11.6    0.001    0.004   21.117   21.117
 qs_rho_update_rho_low              110  7.6    0.001    0.001   18.931   19.350
 calculate_rho_elec                 110  8.6    0.030    0.033   18.930   19.349
 make_full_inverse_cholesky           7  9.6    0.000    0.001   15.957   16.024
 qs_ot_get_orbitals                 105 10.6    0.001    0.001   14.996   15.241
 rs_pw_transfer                     690 11.5    0.012    0.048   13.040   14.451
 qs_ot_get_derivative_taylor         37 12.8    0.001    0.001   14.193   14.280
 density_rs2pw                      110  9.6    0.006    0.007   12.756   14.196
 mp_sum_l                          4764 12.2   12.406   13.625   12.406   13.625
 pw_transfer                       1645 12.4    0.082    0.102   13.300   13.515
 fft_wrap_pw1pw2                   1425 13.5    0.013    0.016   13.159   13.378
 init_scf_run                         5  5.8    0.000    0.001   12.103   12.104
 scf_env_initial_rho_setup            5  6.8    0.000    0.002   12.103   12.104
 calculate_dm_sparse                110  9.5    0.000    0.001   11.653   11.762
 fft_wrap_pw1pw2_240                915 15.0    1.178    1.268   11.279   11.462
 qs_vxc_create                      110 10.4    0.003    0.007   11.138   11.180
 cp_fm_cholesky_invert                7 10.6   10.869   10.878   10.869   10.878
 qs_ot_get_derivative_diag           18 12.0    0.000    0.000   10.648   10.703
 dbcsr_mm_accdrv_process         154766 15.8    6.161    6.375    9.642   10.481
 check_diag                          80 13.5    8.596    8.898    9.654    9.806
 fft3d_pb                           915 16.0    2.376    2.601    9.363    9.549
 sum_up_and_integrate                60 10.3    0.028    0.037    9.411    9.434
 integrate_v_rspace                  60 11.3    0.002    0.003    9.383    9.407
 multiply_cannon_metrocomm1       69360 15.2    0.100    0.107    4.902    9.003
 acc_transpose_blocks             69360 15.2    0.356    0.376    7.558    7.988
 xc_rho_set_and_dset_create         110 12.4    0.077    0.099    7.736    7.980
 make_m2s                          2890 13.2    0.077    0.086    6.970    7.565
 xc_vxc_pw_create                    60 11.3    0.039    0.049    7.504    7.546
 make_images                       2890 14.2    0.241    0.261    6.865    7.460
 cp_dbcsr_sm_fm_multiply             15  9.3    0.001    0.003    7.396    7.413
 make_full_single_inverse             7  9.6    0.001    0.004    7.133    7.166
 calculate_first_density_matrix       1  7.0    0.001    0.006    6.958    6.977
 cp_dbcsr_sm_fm_multiply_core        15 10.3    0.000    0.001    6.903    6.950
 xc_pw_derive                       510 13.4    0.005    0.006    6.828    6.903
 mp_alltoall_z22v                  2340 17.7    6.490    6.802    6.490    6.802
 mp_waitany                        7680 13.5    4.602    6.167    4.602    6.167
 potential_pw2rs                     60 12.3    0.003    0.003    5.465    5.520
 acc_transpose_blocks_kernels     69360 16.2    0.858    0.906    4.794    5.084
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="601", plot="h2o_512_md", label="(64n/12r/1t)", y=239.258000, yerr=0.000000
PlotPoint: name="602", plot="h2o_512_md_mem", label="(64n/12r/1t)", y=562.600000, yerr=3.322650
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 8ce3b69bbf0a9b31bf3df725b357dfb78f348a44
Summary: empty
Status: OK