=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1, Cray-FFTW 3.3.8.10,
#              COSMA 2.6.2, ELPA 2022.11.001, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.1.0, LIBVORI 220621, LIBXSMM 1.17, PLUMED 2.8.1,
#              SIRIUS 7.4.3, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Author: Matthias Krack (03.02.2023)
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.2
USE_ELPA       := 2022.11.001
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.1.0
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.8.1
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.4.3
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.4
HDF5_VER       := 1.12.0
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   HDF5_VER       := $(strip $(HDF5_VER))
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(HDF5_VER)/lib
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__HDF5
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta_prefixed_scalapack.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/01
 job id: 44689621
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/02
 job id: 44689622
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/03
 job id: 44689623
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/04
 job id: 44689624
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/05
 job id: 44689625
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/06
 job id: 44689626
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/07
 job id: 44689628
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/08
 job id: 44689629
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/09
 job id: 44689630
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/10
 job id: 44689631
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/11
 job id: 44689632
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/12
 job id: 44689633
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/13
 job id: 44689634
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/14
 job id: 44689635
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/15
 job id: 44689636
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/16
 job id: 44689637
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/17
 job id: 44689638
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/18
 job id: 44689639
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/19
 job id: 44689640
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/20
 job id: 44689641
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/21
 job id: 44689642
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/22
 job id: 44689643
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/23
 job id: 44689644
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/24
 job id: 44689645
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/25
 job id: 44689646
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/26
 job id: 44689647
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          344                      9.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.021    0.035  133.699  133.700
 farming_run                          1  2.0  133.246  133.247  133.670  133.674
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.456660E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              229                1108280.
 MP_Allreduce          485                2282278.
 MP_Sync                27
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split           8
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.010    0.025  115.530  115.530
 qs_energies                          1  2.0    0.000    0.000  115.327  115.328
 mp2_main                             1  3.0    0.000    0.000  113.408  113.409
 mp2_gpw_main                         1  4.0    0.020    0.026  112.569  112.570
 mp2_ri_gpw_compute_in                1  5.0    0.173    0.187   93.717   94.157
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.005   55.487   55.929
 mp2_eri_3c_integrate_gpw           272  7.0    0.154    0.174   41.815   47.217
 get_2c_integrals                     1  6.0    0.000    0.001   37.367   38.043
 integrate_v_rspace                 273  8.0    0.434    0.448   25.171   30.277
 pw_transfer                       6555 10.6    0.375    0.397   27.404   28.121
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.048   26.097   26.674
 grid_integrate_task_list           273  9.0   20.984   26.578   20.984   26.578
 fft_wrap_pw1pw2_100               2178 12.4    1.179    1.396   23.612   24.204
 compute_2c_integrals                 1  7.0    0.002    0.003   19.646   19.648
 compute_2c_integrals_loop_lm         1  8.0    0.003    0.004   18.856   19.375
 mp2_eri_2c_integrate_gpw             1  9.0    2.380    2.468   18.853   19.373
 rpa_ri_compute_en                    1  5.0    0.001    0.001   18.744   18.968
 cp_fm_cholesky_decompose            12  8.2   17.668   18.330   17.668   18.330
 cholesky_decomp                      1  7.0    0.000    0.000   16.566   17.231
 fft3d_s                           5443 13.4   16.163   16.639   16.185   16.659
 ao_to_mo_and_store_B_mult_1        272  7.0   10.843   15.562   10.843   15.562
 calculate_wavefunction             272  8.0    5.421    5.580   12.568   13.133
 rpa_num_int                          1  6.0    0.000    0.001   10.578   10.578
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.536   10.578
 calc_mat_Q                           8  8.0    0.000    0.000    9.385    9.492
 contract_S_to_Q                      8  9.0    0.000    0.000    8.804    8.913
 calc_potential_gpw                 544  9.5    0.005    0.006    8.262    8.606
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.002    8.209    8.512
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.389    8.468
 parallel_gemm_fm_cosma              14 10.1    8.389    8.468    8.389    8.468
 potential_pw2rs                    545 10.0    0.108    0.110    7.672    8.318
 collocate_single_gaussian          272 10.0    0.039    0.042    7.446    7.761
 create_integ_mat                     1  6.0    0.006    0.008    7.630    7.630
 array2fm                             1  7.0    0.000    0.000    6.630    7.086
 pw_scatter_s                      2720 13.7    4.462    4.619    4.462    4.619
 pw_gather_s                       2722 13.2    3.880    4.214    3.880    4.214
 array2fm_buffer_send                 1  8.0    2.974    3.131    2.974    3.131
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=112.569830, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2729.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          344                     10.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.026    0.038  395.489  395.489
 farming_run                          1  2.0  394.851  394.855  395.454  395.456
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827128576       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788821       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.223098E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              704                 407793.
 MP_Allreduce         1821                  23730.
 MP_Sync                38
 MP_Alltoall            77                 975921.
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.049  209.449  209.450
 qs_energies                          1  2.0    0.000    0.000  209.215  209.231
 scf_env_do_scf                       1  3.0    0.000    0.000  106.758  106.758
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  105.889  105.897
 rebuild_ks_matrix                    4  6.0    0.000    0.000  105.888  105.896
 qs_ks_build_kohn_sham_matrix         4  7.0    0.055    0.063  105.888  105.896
 hfx_ks_matrix                        4  8.0    0.001    0.001  105.501  105.506
 integrate_four_center                4  9.0    0.144    0.452  105.501  105.506
 mp2_main                             1  3.0    0.000    0.000  102.168  102.184
 mp2_gpw_main                         1  4.0    0.034    0.059  101.320  101.336
 integrate_four_center_main           4 10.0    0.088    0.493   96.930   99.803
 integrate_four_center_bin          264 11.0   96.842   99.792   96.842   99.792
 init_scf_loop                        1  4.0    0.000    0.000   92.429   92.429
 mp2_ri_gpw_compute_in                1  5.0    0.065    0.072   74.798   75.894
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   54.488   55.582
 mp2_eri_3c_integrate_gpw            91  7.0    0.143    0.162   42.114   47.206
 integrate_v_rspace                  95  8.0    0.397    0.569   28.495   33.585
 pw_transfer                       2240 10.6    0.143    0.159   29.883   30.299
 ao_to_mo_and_store_B_mult_1         91  7.0   10.686   29.829   10.686   29.829
 fft_wrap_pw1pw2                   1868 11.4    0.018    0.020   28.903   29.302
 grid_integrate_task_list            95  9.0   23.787   29.045   23.787   29.045
 mp2_ri_gpw_compute_en                1  5.0    0.056    0.073   26.372   28.079
 fft_wrap_pw1pw2_100                730 12.4    1.258    1.384   26.599   27.012
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.837    1.911   24.622   24.634
 get_2c_integrals                     1  6.0    0.001    0.009   20.207   20.248
 compute_2c_integrals                 1  7.0    0.003    0.004   19.189   19.196
 compute_2c_integrals_loop_lm         1  8.0    0.002    0.008   18.822   19.067
 mp2_eri_2c_integrate_gpw             1  9.0    1.748    1.847   18.820   19.066
 fft3d_s                           1823 13.4   18.430   18.702   18.443   18.714
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.000   14.327   14.327
 calculate_wavefunction              91  8.0    2.023    2.051    9.739    9.945
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.556    0.579    8.715    9.215
 potential_pw2rs                    186 10.0    0.033    0.035    8.604    9.169
 local_gemm                         172  8.0    8.159    8.647    8.159    8.647
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001    8.210    8.568
 calc_potential_gpw                 182  9.5    0.002    0.002    7.909    8.131
 collocate_single_gaussian           91 10.0    0.017    0.022    7.860    8.127
 mp2_ri_gpw_compute_en_comm          22  7.0    0.500    0.527    7.681    8.065
 mp_sync                             38 10.4    3.264    6.582    3.264    6.582
 mp2_ri_gpw_compute_en_ener         172  7.0    6.344    6.422    6.344    6.422
 mp_sendrecv_dm3                   2068  8.0    5.715    6.095    5.715    6.095
 pw_gather_s                        912 13.2    4.888    5.337    4.888    5.337
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=101.324485, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1510.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             452.546560E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62385.
 MP_Allreduce        10249                    271.
 MP_Sync               580
 MP_Alltoall          2083                 578641.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.028   47.125   47.125
 qs_mol_dyn_low                       1  2.0    0.003    0.008   46.841   46.848
 qs_forces                           11  3.9    0.004    0.008   46.769   46.772
 qs_energies                         11  4.9    0.001    0.002   45.336   45.348
 scf_env_do_scf                      11  5.9    0.000    0.001   39.488   39.488
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   37.525   37.525
 dbcsr_multiply_generic            2286 12.5    0.093    0.098   29.531   29.875
 qs_scf_new_mos                     108  7.5    0.000    0.001   27.526   27.791
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   27.525   27.791
 ot_scf_mini                        108  9.5    0.002    0.002   26.041   26.220
 multiply_cannon                   2286 13.5    0.183    0.188   24.474   25.905
 multiply_cannon_loop              2286 14.5    1.441    1.497   23.806   25.290
 velocity_verlet                     10  3.0    0.001    0.001   22.177   22.178
 ot_mini                            108 10.5    0.001    0.001   16.051   16.300
 qs_ot_get_derivative               108 11.5    0.001    0.001   13.366   13.549
 mp_waitall_1                    245248 16.5    6.511   12.471    6.511   12.471
 multiply_cannon_multrec          54864 15.5    4.479    6.914    7.824   11.504
 multiply_cannon_metrocomm3       54864 15.5    0.070    0.074    4.818   11.488
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.858    7.992
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.012    7.858    7.992
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.930    7.055
 multiply_cannon_sync_h2d         54864 15.5    6.270    6.826    6.270    6.826
 qs_ot_get_p                        119 10.4    0.001    0.001    5.794    6.058
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    4.646    4.742
 mp_sum_l                          7207 12.9    3.180    4.641    3.180    4.641
 init_scf_run                        11  5.9    0.000    0.001    4.611    4.611
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    4.611    4.611
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    4.150    4.577
 dbcsr_mm_accdrv_process          76910 16.1    1.060    1.646    3.268    4.547
 sum_up_and_integrate               119 10.3    0.012    0.015    4.512    4.518
 integrate_v_rspace                 119 11.3    0.002    0.002    4.500    4.507
 qs_rho_update_rho_low              119  7.7    0.000    0.001    4.104    4.177
 calculate_rho_elec                 119  8.7    0.011    0.017    4.103    4.176
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    3.536    3.573
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.848    3.034
 apply_single                       119 13.6    0.000    0.000    2.848    3.034
 jit_kernel_multiply                 13 15.8    2.150    2.863    2.150    2.863
 calculate_dm_sparse                119  9.5    0.000    0.001    2.744    2.847
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.817    2.818
 rs_pw_transfer                     974 11.9    0.011    0.012    2.711    2.817
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.619    2.620
 cp_fm_redistribute_end              50 14.0    2.385    2.599    2.390    2.600
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.573    2.578
 ot_diis_step                       108 11.5    0.006    0.006    2.562    2.563
 cp_fm_diag_elpa_base                50 14.0    0.208    2.516    0.209    2.529
 density_rs2pw                      119  9.7    0.004    0.004    2.216    2.309
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.231    2.233
 grid_integrate_task_list           119 12.3    2.035    2.144    2.035    2.144
 acc_transpose_blocks             54864 15.5    0.212    0.228    1.668    2.077
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.031    2.068
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.993    2.044
 wfi_extrapolate                     11  7.9    0.001    0.001    1.979    1.979
 init_scf_loop                       11  6.9    0.000    0.000    1.944    1.945
 potential_pw2rs                    119 12.3    0.004    0.004    1.852    1.864
 pw_transfer                       1439 11.6    0.053    0.061    1.785    1.861
 multiply_cannon_metrocomm1       54864 15.5    0.054    0.059    1.054    1.807
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    1.708    1.787
 make_m2s                          4572 13.5    0.054    0.056    1.564    1.608
 mp_sum_d                          4127 12.0    0.959    1.558    0.959    1.558
 fft3d_ps                          1201 14.6    0.370    0.472    1.479    1.549
 make_images                       4572 14.5    0.132    0.137    1.482    1.525
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.417    1.431
 mp_waitany                       12084 13.8    1.232    1.409    1.232    1.409
 fft_wrap_pw1pw2_140                487 13.2    0.083    0.096    1.317    1.401
 grid_collocate_task_list           119  9.7    1.295    1.356    1.295    1.356
 mp_alltoall_d11v                  2130 13.8    1.157    1.327    1.157    1.327
 acc_transpose_blocks_kernels     54864 16.5    0.234    0.353    0.772    1.038
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.974    0.986
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=47.125000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=431.000000, yerr=1.348400
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             489.213952E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62664.
 MP_Allreduce        10226                    305.
 MP_Sync               104
 MP_Alltoall          2060                1030242.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.032   38.901   38.902
 qs_mol_dyn_low                       1  2.0    0.003    0.004   38.599   38.606
 qs_forces                           11  3.9    0.003    0.007   38.539   38.542
 qs_energies                         11  4.9    0.003    0.011   36.853   36.860
 scf_env_do_scf                      11  5.9    0.001    0.004   31.376   31.378
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   28.791   28.792
 dbcsr_multiply_generic            2286 12.5    0.099    0.102   21.074   21.458
 qs_scf_new_mos                     108  7.5    0.001    0.001   19.706   19.960
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   19.705   19.959
 ot_scf_mini                        108  9.5    0.003    0.004   18.831   19.007
 velocity_verlet                     10  3.0    0.001    0.001   18.065   18.066
 multiply_cannon                   2286 13.5    0.206    0.216   16.175   17.739
 multiply_cannon_loop              2286 14.5    0.902    0.982   15.091   16.523
 ot_mini                            108 10.5    0.001    0.001   11.645   11.884
 mp_waitall_1                    200699 16.5    5.493   10.704    5.493   10.704
 multiply_cannon_metrocomm3       27432 15.5    0.068    0.072    4.100    9.584
 qs_ot_get_derivative               108 11.5    0.001    0.002    9.200    9.380
 multiply_cannon_multrec          27432 15.5    1.963    4.517    5.855    8.600
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.171    7.325
 qs_ks_build_kohn_sham_matrix       119  9.3    0.048    0.062    7.171    7.324
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.351    6.492
 dbcsr_mm_accdrv_process          47894 16.0    2.924    4.957    3.824    5.659
 qs_ot_get_p                        119 10.4    0.001    0.001    4.352    4.583
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.544    4.391
 sum_up_and_integrate               119 10.3    0.024    0.027    4.135    4.141
 integrate_v_rspace                 119 11.3    0.002    0.003    4.111    4.119
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.034    4.106
 apply_single                       119 13.6    0.000    0.000    3.034    4.106
 mp_sum_l                          7207 12.9    2.033    4.009    2.033    4.009
 init_scf_run                        11  5.9    0.000    0.001    3.896    3.897
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.896    3.897
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.747    3.785
 calculate_rho_elec                 119  8.7    0.021    0.024    3.746    3.784
 rs_pw_transfer                     974 11.9    0.010    0.011    2.737    3.257
 qs_ot_p2m_diag                      50 11.0    0.009    0.013    2.910    2.928
 multiply_cannon_sync_h2d         27432 15.5    2.200    2.919    2.200    2.919
 make_m2s                          4572 13.5    0.052    0.053    2.419    2.643
 density_rs2pw                      119  9.7    0.004    0.004    2.086    2.567
 init_scf_loop                       11  6.9    0.006    0.030    2.564    2.566
 make_images                       4572 14.5    0.198    0.234    2.331    2.553
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.477    2.478
 calculate_first_density_matrix       1  7.0    0.001    0.003    2.403    2.404
 ot_diis_step                       108 11.5    0.010    0.011    2.397    2.398
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.114    2.209
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.138    2.139
 cp_fm_redistribute_end              50 14.0    1.771    2.115    1.775    2.117
 calculate_dm_sparse                119  9.5    0.000    0.000    2.012    2.090
 cp_fm_diag_elpa_base                50 14.0    0.327    2.009    0.340    2.058
 pw_transfer                       1439 11.6    0.065    0.071    1.954    1.984
 grid_integrate_task_list           119 12.3    1.833    1.925    1.833    1.925
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.862    1.896
 potential_pw2rs                    119 12.3    0.006    0.006    1.866    1.875
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.857    1.859
 jit_kernel_multiply                  9 16.2    0.846    1.812    0.846    1.812
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.663    1.706
 prepare_preconditioner              11  7.9    0.000    0.001    1.551    1.614
 make_preconditioner                 11  8.9    0.000    0.001    1.551    1.614
 fft3d_ps                          1201 14.6    0.511    0.568    1.564    1.595
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.455    1.549
 make_images_data                  4572 15.5    0.044    0.050    1.110    1.510
 fft_wrap_pw1pw2_140                487 13.2    0.079    0.085    1.466    1.501
 acc_transpose_blocks             27432 15.5    0.108    0.113    1.179    1.482
 wfi_extrapolate                     11  7.9    0.001    0.001    1.443    1.443
 hybrid_alltoall_any               4725 16.4    0.050    0.111    0.975    1.436
 grid_collocate_task_list           119  9.7    1.230    1.363    1.230    1.363
 qs_energies_init_hamiltonians       11  5.9    0.022    0.158    1.308    1.313
 mp_waitany                        5720 13.7    0.745    1.298    0.745    1.298
 rs_pw_transfer_RS2PW_140           130 11.5    0.143    0.185    0.776    1.297
 mp_alltoall_d11v                  2130 13.8    1.157    1.289    1.157    1.289
 mp_allgather_i34                  2286 14.5    0.531    1.275    0.531    1.275
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.243    1.250
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.153    1.198
 mp_sum_d                          4127 12.0    0.710    1.196    0.710    1.196
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.888    0.902
 qs_env_update_s_mstruct             11  6.9    0.001    0.005    0.809    0.891
 acc_transpose_blocks_kernels     27432 16.5    0.182    0.271    0.649    0.854
 dbcsr_dot_sd                      1205 11.9    0.072    0.088    0.449    0.834
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=38.902000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=465.000000, yerr=1.206045
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             521.408512E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62660.
 MP_Allreduce        10225                    303.
 MP_Sync               104
 MP_Alltoall          1821                1587563.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_comm_split          50
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.039    0.052   32.141   32.144
 qs_mol_dyn_low                       1  2.0    0.003    0.004   31.813   31.820
 qs_forces                           11  3.9    0.002    0.006   31.749   31.756
 qs_energies                         11  4.9    0.004    0.008   30.177   30.185
 scf_env_do_scf                      11  5.9    0.001    0.005   25.455   25.455
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.008   22.953   22.953
 dbcsr_multiply_generic            2286 12.5    0.094    0.096   16.232   16.322
 velocity_verlet                     10  3.0    0.001    0.001   15.148   15.149
 qs_scf_new_mos                     108  7.5    0.001    0.001   14.710   14.725
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   14.710   14.724
 ot_scf_mini                        108  9.5    0.003    0.003   13.992   14.008
 multiply_cannon                   2286 13.5    0.194    0.200   13.035   13.773
 multiply_cannon_loop              2286 14.5    0.635    0.660   12.275   13.028
 ot_mini                            108 10.5    0.001    0.001    8.609    8.623
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.118    7.134
 multiply_cannon_multrec          18288 15.5    1.916    2.800    6.795    7.098
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.409    6.423
 qs_ks_build_kohn_sham_matrix       119  9.3    0.014    0.016    6.408    6.422
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.666    5.679
 dbcsr_mm_accdrv_process          38222 16.0    4.077    5.267    4.795    5.559
 sum_up_and_integrate               119 10.3    0.031    0.032    3.986    3.990
 integrate_v_rspace                 119 11.3    0.002    0.004    3.954    3.964
 init_scf_run                        11  5.9    0.000    0.001    3.530    3.530
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.530    3.530
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.478    3.486
 calculate_rho_elec                 119  8.7    0.030    0.031    3.478    3.485
 mp_waitall_1                    158411 16.6    2.570    3.411    2.570    3.411
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.676    3.298
 qs_ot_get_p                        119 10.4    0.001    0.001    3.238    3.261
 rs_pw_transfer                     974 11.9    0.009    0.010    2.271    2.513
 init_scf_loop                       11  6.9    0.001    0.004    2.480    2.482
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.036    2.357
 apply_single                       119 13.6    0.000    0.000    2.035    2.356
 calculate_first_density_matrix       1  7.0    0.001    0.003    2.319    2.320
 density_rs2pw                      119  9.7    0.004    0.004    1.997    2.248
 qs_ot_p2m_diag                      50 11.0    0.012    0.013    2.183    2.189
 jit_kernel_multiply                 11 15.8    0.665    2.110    0.665    2.110
 multiply_cannon_metrocomm3       18288 15.5    0.045    0.047    1.389    2.069
 pw_transfer                       1439 11.6    0.065    0.072    1.974    1.984
 calculate_dm_sparse                119  9.5    0.000    0.000    1.917    1.925
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.924    1.924
 grid_integrate_task_list           119 12.3    1.811    1.901    1.811    1.901
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.880    1.894
 make_m2s                          4572 13.5    0.044    0.045    1.747    1.883
 make_images                       4572 14.5    0.188    0.201    1.662    1.797
 potential_pw2rs                    119 12.3    0.007    0.008    1.751    1.761
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.696    1.697
 prepare_preconditioner              11  7.9    0.000    0.000    1.683    1.686
 make_preconditioner                 11  8.9    0.000    0.001    1.683    1.686
 cp_fm_redistribute_end              50 14.0    1.265    1.676    1.266    1.676
 cp_fm_diag_elpa_base                50 14.0    0.394    1.597    0.408    1.641
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.636    1.638
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.538    1.622
 fft3d_ps                          1201 14.6    0.522    0.537    1.560    1.576
 multiply_cannon_sync_h2d         18288 15.5    1.402    1.572    1.402    1.572
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.528    1.535
 fft_wrap_pw1pw2_140                487 13.2    0.090    0.093    1.506    1.519
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.494    1.500
 ot_diis_step                       108 11.5    0.011    0.011    1.472    1.472
 mp_sum_l                          7207 12.9    1.069    1.406    1.069    1.406
 grid_collocate_task_list           119  9.7    1.214    1.363    1.214    1.363
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.291    1.295
 acc_transpose_blocks             18288 15.5    0.076    0.078    1.255    1.273
 wfi_extrapolate                     11  7.9    0.001    0.001    1.162    1.162
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    0.958    0.966
 make_images_data                  4572 15.5    0.044    0.048    0.789    0.935
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.868    0.889
 hybrid_alltoall_any               4725 16.4    0.055    0.113    0.665    0.847
 acc_transpose_blocks_kernels     18288 16.5    0.210    0.217    0.816    0.826
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.813    0.815
 mp_alltoall_d11v                  2130 13.8    0.642    0.801    0.642    0.801
 mp_waitany                        9880 13.7    0.521    0.789    0.521    0.789
 rs_pw_transfer_RS2PW_140           130 11.5    0.118    0.121    0.509    0.755
 mp_alltoall_z22v                  1201 16.6    0.642    0.754    0.642    0.754
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.662    0.718
 cp_fm_cholesky_invert               11 10.9    0.710    0.714    0.710    0.714
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=32.144000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=496.909091, yerr=1.831767
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             549.396480E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62659.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_comm_split          50
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.025    0.048   34.869   34.869
 qs_mol_dyn_low                       1  2.0    0.003    0.013   34.640   34.647
 qs_forces                           11  3.9    0.001    0.002   34.571   34.573
 qs_energies                         11  4.9    0.001    0.001   32.847   32.853
 scf_env_do_scf                      11  5.9    0.000    0.001   27.909   27.910
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   24.481   24.481
 dbcsr_multiply_generic            2286 12.5    0.099    0.102   17.825   17.929
 velocity_verlet                     10  3.0    0.001    0.001   17.781   17.783
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.943   15.996
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.942   15.996
 ot_scf_mini                        108  9.5    0.002    0.003   15.010   15.057
 multiply_cannon                   2286 13.5    0.228    0.281   14.235   14.739
 multiply_cannon_loop              2286 14.5    0.935    0.965   13.306   13.698
 ot_mini                            108 10.5    0.001    0.001    9.101    9.162
 multiply_cannon_multrec          27432 15.5    2.343    3.028    8.581    8.960
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.296    7.343
 dbcsr_mm_accdrv_process          47916 15.9    5.158    6.916    6.146    7.314
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.624    6.675
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.014    6.623    6.675
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.880    5.925
 sum_up_and_integrate               119 10.3    0.036    0.039    3.847    3.855
 integrate_v_rspace                 119 11.3    0.002    0.003    3.811    3.820
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.593    3.628
 calculate_rho_elec                 119  8.7    0.040    0.046    3.592    3.627
 init_scf_run                        11  5.9    0.000    0.001    3.608    3.608
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.607    3.608
 qs_ot_get_p                        119 10.4    0.001    0.001    3.341    3.416
 init_scf_loop                       11  6.9    0.000    0.000    3.408    3.409
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.637    3.125
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.125    2.583
 apply_single                       119 13.6    0.000    0.000    2.125    2.583
 prepare_preconditioner              11  7.9    0.000    0.000    2.537    2.544
 make_preconditioner                 11  8.9    0.000    0.000    2.537    2.544
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.148    2.470
 make_m2s                          4572 13.5    0.054    0.055    2.144    2.248
 mp_waitall_1                    137007 16.6    1.645    2.246    1.645    2.246
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.227    2.229
 rs_pw_transfer                     974 11.9    0.009    0.010    2.003    2.193
 density_rs2pw                      119  9.7    0.004    0.004    2.003    2.182
 pw_transfer                       1439 11.6    0.066    0.072    2.120    2.160
 make_images                       4572 14.5    0.268    0.329    2.039    2.142
 calculate_dm_sparse                119  9.5    0.000    0.000    2.023    2.076
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.027    2.072
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.052    2.060
 grid_integrate_task_list           119 12.3    1.836    1.921    1.836    1.921
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.892    1.917
 jit_kernel_multiply                 10 15.9    0.927    1.815    0.927    1.815
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.782    1.783
 ot_diis_step                       108 11.5    0.012    0.012    1.765    1.766
 fft3d_ps                          1201 14.6    0.558    0.610    1.702    1.740
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.737    1.737
 fft_wrap_pw1pw2_140                487 13.2    0.088    0.094    1.673    1.724
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.609    1.623
 potential_pw2rs                    119 12.3    0.008    0.010    1.595    1.600
 mp_sum_l                          7207 12.9    1.018    1.566    1.018    1.566
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.493    1.494
 cp_fm_redistribute_end              50 14.0    0.989    1.469    0.990    1.470
 acc_transpose_blocks             27432 15.5    0.111    0.114    1.442    1.463
 cp_fm_diag_elpa_base                50 14.0    0.458    1.403    0.478    1.445
 grid_collocate_task_list           119  9.7    1.222    1.356    1.222    1.356
 wfi_extrapolate                     11  7.9    0.001    0.001    1.331    1.331
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.211    1.221
 multiply_cannon_metrocomm3       27432 15.5    0.037    0.039    0.685    1.187
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.087    1.105
 cp_fm_upper_to_full                 72 13.5    0.790    1.096    0.790    1.096
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.078    1.080
 multiply_cannon_sync_h2d         27432 15.5    1.002    1.060    1.002    1.060
 dbcsr_complete_redistribute        329 12.2    0.120    0.149    0.775    1.050
 make_images_data                  4572 15.5    0.045    0.049    0.835    0.949
 hybrid_alltoall_any               4725 16.4    0.061    0.150    0.729    0.880
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.794    0.871
 mp_alltoall_d11v                  2130 13.8    0.726    0.868    0.726    0.868
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.833    0.838
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.565    0.830
 acc_transpose_blocks_kernels     27432 16.5    0.268    0.275    0.821    0.829
 mp_alltoall_z22v                  1201 16.6    0.728    0.768    0.728    0.768
 cp_fm_cholesky_invert               11 10.9    0.751    0.753    0.751    0.753
 mp_alltoall_i22                    627 13.8    0.421    0.712    0.421    0.712
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=34.869000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=522.181818, yerr=2.036851
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             600.371200E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10224                    344.
 MP_Sync               104
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_comm_split          50
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.028   28.300   28.301
 qs_mol_dyn_low                       1  2.0    0.003    0.004   28.134   28.141
 qs_forces                           11  3.9    0.002    0.002   28.067   28.072
 qs_energies                         11  4.9    0.001    0.001   26.346   26.353
 scf_env_do_scf                      11  5.9    0.000    0.001   21.709   21.709
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   19.091   19.093
 velocity_verlet                     10  3.0    0.001    0.001   14.525   14.528
 dbcsr_multiply_generic            2286 12.5    0.092    0.094   12.041   12.164
 qs_scf_new_mos                     108  7.5    0.001    0.001   11.028   11.048
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   11.028   11.048
 ot_scf_mini                        108  9.5    0.002    0.002   10.362   10.390
 multiply_cannon                   2286 13.5    0.233    0.245    9.541    9.858
 multiply_cannon_loop              2286 14.5    0.329    0.341    8.622    8.844
 multiply_cannon_multrec           9144 15.5    1.623    2.047    5.765    6.151
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.075    6.094
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.014    6.075    6.094
 ot_mini                            108 10.5    0.001    0.001    5.802    5.835
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.418    5.436
 qs_ot_get_derivative               108 11.5    0.001    0.001    4.509    4.536
 dbcsr_mm_accdrv_process          12550 15.8    3.103    3.727    4.042    4.121
 sum_up_and_integrate               119 10.3    0.038    0.041    3.686    3.690
 integrate_v_rspace                 119 11.3    0.003    0.003    3.648    3.653
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.578    3.583
 calculate_rho_elec                 119  8.7    0.060    0.061    3.578    3.583
 init_scf_run                        11  5.9    0.000    0.001    3.191    3.191
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.191    3.191
 qs_ot_get_p                        119 10.4    0.001    0.001    2.836    2.870
 init_scf_loop                       11  6.9    0.000    0.000    2.596    2.598
 pw_transfer                       1439 11.6    0.066    0.070    2.170    2.181
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.076    2.089
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.048    2.049
 density_rs2pw                      119  9.7    0.004    0.004    1.905    2.046
 mp_waitall_1                    115863 16.7    1.425    1.942    1.425    1.942
 grid_integrate_task_list           119 12.3    1.849    1.917    1.849    1.917
 make_m2s                          4572 13.5    0.034    0.035    1.710    1.885
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.878    1.881
 rs_pw_transfer                     974 11.9    0.008    0.008    1.681    1.821
 make_images                       4572 14.5    0.266    0.299    1.621    1.794
 prepare_preconditioner              11  7.9    0.000    0.000    1.757    1.760
 make_preconditioner                 11  8.9    0.000    0.000    1.757    1.760
 fft3d_ps                          1201 14.6    0.562    0.573    1.735    1.746
 fft_wrap_pw1pw2_140                487 13.2    0.087    0.090    1.712    1.724
 calculate_dm_sparse                119  9.5    0.000    0.000    1.688    1.707
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.686    1.686
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.639    1.674
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.528    1.540
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.506    1.508
 potential_pw2rs                    119 12.3    0.010    0.010    1.423    1.425
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.403    1.403
 grid_collocate_task_list           119  9.7    1.270    1.389    1.270    1.389
 cp_fm_redistribute_end              50 14.0    0.699    1.380    0.700    1.380
 cp_fm_diag_elpa_base                50 14.0    0.636    1.311    0.679    1.366
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.350    1.356
 ot_diis_step                       108 11.5    0.012    0.012    1.280    1.280
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.235    1.240
 jit_kernel_multiply                  7 15.9    0.901    1.218    0.901    1.218
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.154    1.173
 apply_single                       119 13.6    0.000    0.000    1.154    1.173
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.120    1.135
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.117    1.123
 wfi_extrapolate                     11  7.9    0.001    0.001    1.094    1.094
 hybrid_alltoall_any               4725 16.4    0.062    0.176    0.757    1.021
 make_images_data                  4572 15.5    0.038    0.042    0.793    1.003
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.868    0.921
 acc_transpose_blocks              9144 15.5    0.038    0.038    0.886    0.895
 cp_fm_cholesky_invert               11 10.9    0.875    0.877    0.875    0.877
 mp_alltoall_d11v                  2130 13.8    0.769    0.870    0.769    0.870
 multiply_cannon_sync_h2d          9144 15.5    0.710    0.792    0.710    0.792
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.780    0.783
 multiply_cannon_metrocomm3        9144 15.5    0.019    0.019    0.380    0.776
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    0.688    0.744
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.718    0.727
 mp_alltoall_z22v                  1201 16.6    0.621    0.677    0.621    0.677
 mp_allgather_i34                  2286 14.5    0.256    0.657    0.256    0.657
 acc_transpose_blocks_kernels      9144 16.5    0.117    0.121    0.647    0.650
 yz_to_x                            606 15.1    0.265    0.275    0.589    0.604
 mp_waitany                        5200 13.7    0.457    0.589    0.457    0.589
 x_to_yz                            595 16.2    0.275    0.288    0.573    0.580
 qs_create_task_list                 11  7.9    0.000    0.000    0.550    0.574
 generate_qs_task_list               11  8.9    0.189    0.211    0.549    0.574
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=28.301000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=570.363636, yerr=4.183794
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             763.203584E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63729.
 MP_Allreduce        10074                    433.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.035   41.387   41.388
 qs_mol_dyn_low                       1  2.0    0.003    0.004   41.183   41.196
 qs_forces                           11  3.9    0.001    0.002   41.124   41.125
 qs_energies                         11  4.9    0.001    0.001   39.146   39.150
 scf_env_do_scf                      11  5.9    0.001    0.001   33.365   33.365
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.006   25.540   25.542
 velocity_verlet                     10  3.0    0.001    0.001   23.382   23.387
 dbcsr_multiply_generic            2286 12.5    0.099    0.100   17.300   17.454
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.695   15.792
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.694   15.791
 ot_scf_mini                        108  9.5    0.002    0.002   14.623   14.725
 multiply_cannon                   2286 13.5    0.304    0.313   13.487   14.392
 multiply_cannon_loop              2286 14.5    0.344    0.351   12.249   13.155
 ot_mini                            108 10.5    0.001    0.001    8.699    8.818
 multiply_cannon_multrec           9144 15.5    3.365    4.791    8.520    8.635
 init_scf_loop                       11  6.9    0.000    0.000    7.797    7.802
 rebuild_ks_matrix                  119  8.3    0.000    0.001    7.348    7.494
 qs_ks_build_kohn_sham_matrix       119  9.3    0.014    0.014    7.348    7.493
 prepare_preconditioner              11  7.9    0.000    0.000    6.819    6.833
 make_preconditioner                 11  8.9    0.000    0.000    6.819    6.833
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.644    6.775
 qs_ot_get_derivative               108 11.5    0.001    0.001    6.661    6.762
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.374    6.711
 dbcsr_mm_accdrv_process          12550 15.8    4.043    5.295    5.032    6.377
 cp_fm_upper_to_full                 72 14.2    3.187    4.573    3.187    4.573
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.280    4.285
 calculate_rho_elec                 119  8.7    0.118    0.121    4.280    4.285
 sum_up_and_integrate               119 10.3    0.066    0.067    4.032    4.038
 integrate_v_rspace                 119 11.3    0.003    0.003    3.966    3.974
 init_scf_run                        11  5.9    0.000    0.001    3.754    3.754
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.753    3.753
 qs_ot_get_p                        119 10.4    0.001    0.001    3.198    3.336
 mp_waitall_1                     94719 16.7    2.334    3.213    2.334    3.213
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.443    2.848
 dbcsr_complete_redistribute        329 12.2    0.306    0.312    1.998    2.825
 pw_transfer                       1439 11.6    0.069    0.069    2.812    2.820
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.713    2.720
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.689    2.515
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.169    2.442
 apply_single                       119 13.6    0.000    0.000    2.168    2.441
 make_m2s                          4572 13.5    0.038    0.038    2.270    2.431
 mp_alltoall_i22                    627 13.8    1.501    2.375    1.501    2.375
 fft3d_ps                          1201 14.6    0.593    0.601    2.337    2.345
 make_images                       4572 14.5    0.348    0.378    2.149    2.309
 fft_wrap_pw1pw2_140                487 13.2    0.095    0.096    2.295    2.304
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.302    2.303
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.440    2.260
 density_rs2pw                      119  9.7    0.004    0.004    2.197    2.221
 calculate_dm_sparse                119  9.5    0.000    0.000    2.130    2.148
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    1.284    2.108
 grid_integrate_task_list           119 12.3    2.070    2.080    2.070    2.080
 ot_diis_step                       108 11.5    0.014    0.014    2.012    2.012
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.998    1.999
 qs_ot_p2m_diag                      50 11.0    0.042    0.043    1.948    1.950
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.765    1.766
 mp_sum_l                          7207 12.9    1.009    1.758    1.009    1.758
 jit_kernel_multiply                 10 15.8    0.962    1.750    0.962    1.750
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.666    1.716
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.645    1.690
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.678    1.678
 grid_collocate_task_list           119  9.7    1.496    1.511    1.496    1.511
 rs_pw_transfer                     974 11.9    0.009    0.009    1.427    1.457
 cp_fm_cholesky_invert               11 10.9    1.437    1.440    1.437    1.440
 potential_pw2rs                    119 12.3    0.014    0.015    1.426    1.429
 hybrid_alltoall_any               4725 16.4    0.087    0.147    1.152    1.395
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.386    1.386
 cp_fm_diag_elpa_base                50 14.0    1.243    1.294    1.384    1.384
 wfi_extrapolate                     11  7.9    0.001    0.001    1.384    1.384
 make_images_data                  4572 15.5    0.043    0.046    1.100    1.301
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.206    1.214
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.123    1.143
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.101    1.121
 mp_alltoall_d11v                  2130 13.8    1.059    1.079    1.059    1.079
 multiply_cannon_sync_h2d          9144 15.5    1.044    1.047    1.044    1.047
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.982    1.021
 qs_create_task_list                 11  7.9    0.000    0.001    0.934    0.948
 generate_qs_task_list               11  8.9    0.370    0.389    0.933    0.947
 yz_to_x                            606 15.1    0.459    0.470    0.929    0.935
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.916    0.929
 acc_transpose_blocks              9144 15.5    0.038    0.038    0.906    0.925
 copy_dbcsr_to_fm                   153 11.3    0.003    0.003    0.779    0.846
 mp_alltoall_z22v                  1201 16.6    0.793    0.828    0.793    0.828
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=41.388000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=716.636364, yerr=15.101311
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             501.690368E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65372.
 MP_Allreduce         9840                    486.
 MP_Sync               100
 MP_Alltoall          1938                1379060.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_comm_split          48
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.010    0.027   81.095   81.096
 qs_mol_dyn_low                       1  2.0    0.003    0.003   80.757   80.766
 qs_forces                           11  3.9    0.002    0.003   80.691   80.692
 qs_energies                         11  4.9    0.001    0.002   77.842   77.851
 scf_env_do_scf                      11  5.9    0.000    0.001   68.967   68.970
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   63.502   63.502
 dbcsr_multiply_generic            2055 12.4    0.114    0.121   50.335   50.661
 qs_scf_new_mos                      99  7.5    0.000    0.001   46.082   46.203
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   46.081   46.202
 ot_scf_mini                         99  9.5    0.002    0.002   43.720   43.842
 multiply_cannon                   2055 13.4    0.176    0.180   41.632   42.600
 multiply_cannon_loop              2055 14.4    1.512    1.546   40.703   41.684
 velocity_verlet                     10  3.0    0.001    0.001   40.463   40.464
 ot_mini                             99 10.5    0.001    0.001   26.102   26.211
 qs_ot_get_derivative                99 11.5    0.001    0.001   19.269   19.382
 multiply_cannon_multrec          49320 15.4   12.458   13.135   17.370   18.042
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.602   14.749
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.012   14.601   14.748
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.810   12.948
 mp_waitall_1                    220248 16.4   10.168   11.272   10.168   11.272
 multiply_cannon_sync_h2d         49320 15.4   10.307   10.832   10.307   10.832
 qs_ot_get_p                        110 10.4    0.001    0.001    9.289    9.412
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.186    7.805
 apply_single                       110 13.6    0.000    0.000    7.185    7.805
 multiply_cannon_metrocomm3       49320 15.4    0.079    0.083    6.166    7.571
 sum_up_and_integrate               110 10.3    0.036    0.042    7.093    7.107
 integrate_v_rspace                 110 11.3    0.003    0.003    7.057    7.078
 init_scf_run                        11  5.9    0.000    0.001    6.813    6.813
 scf_env_initial_rho_setup           11  6.9    0.002    0.014    6.813    6.813
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.624    6.748
 calculate_rho_elec                 110  8.6    0.021    0.026    6.624    6.747
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    6.117    6.698
 ot_diis_step                        99 11.5    0.005    0.006    6.560    6.560
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    6.458    6.534
 qs_ot_p2m_diag                      48 11.0    0.012    0.019    6.314    6.358
 init_scf_loop                       11  6.9    0.000    0.000    5.438    5.438
 cp_dbcsr_syevd                      48 12.0    0.002    0.003    5.302    5.303
 mp_sum_l                          6514 12.8    4.413    5.271    4.413    5.271
 dbcsr_mm_accdrv_process          87628 16.1    1.942    2.035    4.789    5.073
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    4.828    4.828
 cp_fm_redistribute_end              48 14.0    4.213    4.802    4.217    4.803
 cp_fm_diag_elpa_base                48 14.0    0.580    4.660    0.584    4.682
 rs_pw_transfer                     902 11.9    0.012    0.013    3.657    4.187
 wfi_extrapolate                     11  7.9    0.001    0.001    4.029    4.029
 density_rs2pw                      110  9.6    0.004    0.005    3.420    3.978
 make_m2s                          4110 13.4    0.061    0.065    3.821    3.937
 calculate_dm_sparse                110  9.5    0.001    0.001    3.728    3.843
 make_images                       4110 14.4    0.176    0.191    3.726    3.843
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    3.562    3.566
 grid_integrate_task_list           110 12.3    3.244    3.423    3.244    3.423
 multiply_cannon_metrocomm1       49320 15.4    0.060    0.064    2.190    3.381
 prepare_preconditioner              11  7.9    0.000    0.000    3.289    3.311
 make_preconditioner                 11  8.9    0.000    0.000    3.289    3.311
 pw_transfer                       1331 11.6    0.054    0.063    3.225    3.296
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.229    3.266
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.137    3.211
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.081    3.127
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    3.049    3.086
 fft_wrap_pw1pw2_140                451 13.1    0.172    0.193    2.677    2.759
 fft3d_ps                          1111 14.6    0.777    0.885    2.665    2.722
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.688    2.700
 potential_pw2rs                    110 12.3    0.006    0.007    2.635    2.657
 jit_kernel_multiply                 13 15.9    2.572    2.591    2.572    2.591
 mp_waitany                       14300 13.8    1.809    2.479    1.809    2.479
 mp_alltoall_d11v                  2046 13.8    2.051    2.464    2.051    2.464
 grid_collocate_task_list           110  9.6    2.085    2.323    2.085    2.323
 acc_transpose_blocks             49320 15.4    0.210    0.219    2.077    2.143
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.932    1.951
 mp_sum_d                          3881 11.9    1.403    1.942    1.403    1.942
 make_images_data                  4110 15.4    0.042    0.045    1.743    1.860
 cp_fm_cholesky_invert               11 10.9    1.800    1.804    1.800    1.804
 hybrid_alltoall_any               4261 16.3    0.082    0.478    1.536    1.802
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.636    1.660
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=81.096000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=476.818182, yerr=2.479669
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             588.472320E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65587.
 MP_Allreduce         9839                    562.
 MP_Sync               100
 MP_Alltoall          1717                2462973.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_comm_split          48
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.029   68.777   68.777
 qs_mol_dyn_low                       1  2.0    0.003    0.004   68.469   68.478
 qs_forces                           11  3.9    0.002    0.002   68.402   68.403
 qs_energies                         11  4.9    0.001    0.002   65.032   65.035
 scf_env_do_scf                      11  5.9    0.000    0.001   56.491   56.494
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   48.927   48.928
 dbcsr_multiply_generic            2055 12.4    0.113    0.117   37.454   37.655
 velocity_verlet                     10  3.0    0.001    0.001   36.173   36.174
 qs_scf_new_mos                      99  7.5    0.001    0.001   32.512   32.624
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   32.511   32.623
 multiply_cannon                   2055 13.4    0.228    0.256   30.865   32.097
 ot_scf_mini                         99  9.5    0.003    0.003   30.858   30.975
 multiply_cannon_loop              2055 14.4    0.926    0.957   29.623   30.446
 ot_mini                             99 10.5    0.001    0.001   18.093   18.215
 multiply_cannon_multrec          24660 15.4    7.602    9.210   13.786   15.400
 rebuild_ks_matrix                  110  8.3    0.000    0.000   13.863   13.990
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   13.863   13.989
 qs_ot_get_derivative                99 11.5    0.001    0.001   12.274   12.392
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.187   12.308
 mp_waitall_1                    176588 16.5    7.602   10.108    7.602   10.108
 multiply_cannon_sync_h2d         24660 15.4    7.059    8.206    7.059    8.206
 multiply_cannon_metrocomm3       24660 15.4    0.071    0.074    5.141    7.856
 init_scf_loop                       11  6.9    0.000    0.000    7.526    7.527
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.512    7.183
 apply_single                       110 13.6    0.000    0.001    6.512    7.182
 sum_up_and_integrate               110 10.3    0.052    0.060    6.662    6.673
 integrate_v_rspace                 110 11.3    0.002    0.003    6.610    6.623
 qs_ot_get_p                        110 10.4    0.001    0.001    6.173    6.353
 dbcsr_mm_accdrv_process          52282 16.1    4.576    5.679    6.025    6.297
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.210    6.221
 calculate_rho_elec                 110  8.6    0.039    0.047    6.210    6.220
 init_scf_run                        11  5.9    0.000    0.001    6.194    6.194
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.193    6.194
 ot_diis_step                        99 11.5    0.010    0.010    5.772    5.772
 prepare_preconditioner              11  7.9    0.000    0.000    5.498    5.515
 make_preconditioner                 11  8.9    0.000    0.000    5.498    5.515
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.618    5.348
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.093    5.244
 make_m2s                          4110 13.4    0.057    0.059    4.204    4.646
 make_images                       4110 14.4    0.396    0.441    4.095    4.532
 qs_ot_p2m_diag                      48 11.0    0.028    0.044    4.331    4.352
 pw_transfer                       1331 11.6    0.066    0.073    3.783    3.922
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.906    3.906
 density_rs2pw                      110  9.6    0.004    0.005    3.336    3.885
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.676    3.818
 rs_pw_transfer                     902 11.9    0.012    0.014    2.947    3.524
 wfi_extrapolate                     11  7.9    0.001    0.001    3.504    3.504
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.370    3.372
 cp_fm_redistribute_end              48 14.0    2.520    3.345    2.522    3.345
 grid_integrate_task_list           110 12.3    3.148    3.313    3.148    3.313
 cp_fm_diag_elpa_base                48 14.0    0.788    3.204    0.819    3.286
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.283    3.284
 fft_wrap_pw1pw2_140                451 13.1    0.203    0.220    3.125    3.269
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.157    3.216
 fft3d_ps                          1111 14.6    1.101    1.315    3.027    3.164
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.952    2.986
 calculate_dm_sparse                110  9.5    0.001    0.001    2.948    2.975
 make_images_data                  4110 15.4    0.046    0.050    2.370    2.854
 hybrid_alltoall_any               4261 16.3    0.102    0.445    2.069    2.780
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.600    2.603
 cp_fm_cholesky_invert               11 10.9    2.561    2.568    2.561    2.568
 grid_collocate_task_list           110  9.6    2.045    2.494    2.045    2.494
 potential_pw2rs                    110 12.3    0.008    0.008    2.463    2.476
 mp_sum_l                          6514 12.8    1.758    2.465    1.758    2.465
 jit_kernel_multiply                 13 16.3    1.098    2.451    1.098    2.451
 mp_alltoall_d11v                  2046 13.8    1.741    1.992    1.741    1.992
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.937    1.956
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.850    1.851
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.840    1.850
 mp_waitany                       10164 13.8    1.220    1.769    1.220    1.769
 mp_allgather_i34                  2055 14.4    0.571    1.579    0.571    1.579
 multiply_cannon_metrocomm4       22605 15.4    0.075    0.079    0.780    1.575
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.527    1.539
 rs_pw_transfer_RS2PW_140           121 11.5    0.205    0.216    0.962    1.532
 acc_transpose_blocks             24660 15.4    0.111    0.115    1.508    1.528
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.373    1.482
 mp_irecv_dv                      57340 16.2    0.656    1.462    0.656    1.462
 dbcsr_complete_redistribute        325 12.2    0.246    0.311    1.115    1.381
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=68.777000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=555.363636, yerr=8.059695
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             660.332544E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65578.
 MP_Allreduce         9838                    559.
 MP_Sync               100
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_comm_split          48
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.029   59.820   59.821
 qs_mol_dyn_low                       1  2.0    0.003    0.004   59.456   59.465
 qs_forces                           11  3.9    0.002    0.002   59.392   59.393
 qs_energies                         11  4.9    0.001    0.001   56.192   56.194
 scf_env_do_scf                      11  5.9    0.000    0.001   48.303   48.303
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   39.902   39.903
 velocity_verlet                     10  3.0    0.001    0.001   32.442   32.444
 dbcsr_multiply_generic            2055 12.4    0.107    0.110   28.113   28.352
 qs_scf_new_mos                      99  7.5    0.001    0.001   24.769   24.868
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   24.769   24.867
 ot_scf_mini                         99  9.5    0.002    0.003   23.553   23.667
 multiply_cannon                   2055 13.4    0.210    0.221   21.926   23.086
 multiply_cannon_loop              2055 14.4    0.613    0.627   20.782   21.931
 ot_mini                             99 10.5    0.001    0.001   13.429   13.547
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.441   12.573
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   12.440   12.572
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.961   11.081
 multiply_cannon_multrec          16440 15.4    3.861    4.847    9.592   10.464
 mp_waitall_1                    139946 16.5    6.962   10.272    6.962   10.272
 qs_ot_get_derivative                99 11.5    0.001    0.001    8.946    9.062
 init_scf_loop                       11  6.9    0.000    0.000    8.365    8.367
 multiply_cannon_metrocomm3       16440 15.4    0.043    0.045    4.329    7.505
 prepare_preconditioner              11  7.9    0.000    0.000    6.612    6.629
 make_preconditioner                 11  8.9    0.000    0.000    6.612    6.629
 sum_up_and_integrate               110 10.3    0.060    0.060    6.605    6.620
 integrate_v_rspace                 110 11.3    0.003    0.003    6.545    6.561
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.950    6.305
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.007    6.018
 calculate_rho_elec                 110  8.6    0.058    0.059    6.007    6.017
 dbcsr_mm_accdrv_process          34862 16.1    4.530    5.197    5.584    5.764
 qs_ot_get_p                        110 10.4    0.001    0.001    5.367    5.515
 init_scf_run                        11  5.9    0.000    0.001    5.495    5.495
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.494    5.495
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    5.010    5.437
 apply_single                       110 13.6    0.000    0.000    5.010    5.437
 make_m2s                          4110 13.4    0.050    0.051    4.110    4.458
 ot_diis_step                        99 11.5    0.010    0.011    4.454    4.454
 density_rs2pw                      110  9.6    0.004    0.005    3.120    4.392
 multiply_cannon_sync_h2d         16440 15.4    3.674    4.380    3.674    4.380
 make_images                       4110 14.4    0.390    0.510    3.995    4.343
 rs_pw_transfer                     902 11.9    0.010    0.012    2.684    3.932
 qs_ot_p2m_diag                      48 11.0    0.041    0.044    3.782    3.787
 pw_transfer                       1331 11.6    0.065    0.072    3.720    3.727
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.053    3.704
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.613    3.624
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.448    3.448
 grid_integrate_task_list           110 12.3    3.194    3.382    3.194    3.382
 fft_wrap_pw1pw2_140                451 13.1    0.213    0.218    3.126    3.140
 wfi_extrapolate                     11  7.9    0.001    0.001    2.957    2.957
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.927    2.928
 fft3d_ps                          1111 14.6    1.085    1.097    2.911    2.918
 cp_fm_redistribute_end              48 14.0    1.827    2.903    1.829    2.903
 cp_fm_diag_elpa_base                48 14.0    1.012    2.752    1.069    2.864
 make_images_data                  4110 15.4    0.043    0.048    2.409    2.838
 hybrid_alltoall_any               4261 16.3    0.105    0.374    2.122    2.781
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.732    2.733
 cp_fm_cholesky_invert               11 10.9    2.597    2.603    2.597    2.603
 calculate_dm_sparse                110  9.5    0.001    0.001    2.488    2.515
 mp_waitany                       17072 13.8    1.194    2.512    1.194    2.512
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.452    2.453
 grid_collocate_task_list           110  9.6    2.080    2.448    2.080    2.448
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.400    2.445
 multiply_cannon_metrocomm4       14385 15.4    0.045    0.049    0.887    2.385
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.290    2.355
 potential_pw2rs                    110 12.3    0.010    0.010    2.326    2.337
 mp_irecv_dv                      48980 15.7    0.818    2.265    0.818    2.265
 mp_alltoall_d11v                  2046 13.8    1.717    2.170    1.717    2.170
 rs_pw_transfer_RS2PW_140           121 11.5    0.176    0.180    0.896    2.140
 mp_sum_l                          6514 12.8    1.459    2.112    1.459    2.112
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.965    1.966
 dbcsr_complete_redistribute        325 12.2    0.339    0.362    1.415    1.865
 cp_fm_upper_to_full                 70 13.6    1.374    1.834    1.374    1.834
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.678    1.692
 cp_fm_cholesky_decompose            22 10.9    1.548    1.565    1.548    1.565
 mp_allgather_i34                  2055 14.4    0.473    1.528    0.473    1.528
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.457    1.469
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.360    1.466
 jit_kernel_multiply                  8 16.4    0.666    1.465    0.666    1.465
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.962    1.398
 rs_gather_matrices                 110 12.3    0.233    0.262    0.946    1.349
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.243    1.249
 acc_transpose_blocks             16440 15.4    0.071    0.074    1.195    1.210
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=59.821000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=624.090909, yerr=9.219096
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             737.423360E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65576.
 MP_Allreduce         9838                    600.
 MP_Sync               100
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_comm_split          48
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.038    0.069   64.828   64.829
 qs_mol_dyn_low                       1  2.0    0.003    0.004   64.505   64.514
 qs_forces                           11  3.9    0.002    0.007   64.441   64.442
 qs_energies                         11  4.9    0.002    0.007   61.014   61.018
 scf_env_do_scf                      11  5.9    0.001    0.002   52.666   52.668
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   41.100   41.101
 velocity_verlet                     10  3.0    0.001    0.001   36.786   36.788
 dbcsr_multiply_generic            2055 12.4    0.115    0.119   29.186   29.345
 qs_scf_new_mos                      99  7.5    0.001    0.001   25.951   26.055
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   25.950   26.055
 ot_scf_mini                         99  9.5    0.003    0.003   24.336   24.429
 multiply_cannon                   2055 13.4    0.242    0.267   22.328   23.288
 multiply_cannon_loop              2055 14.4    0.884    0.907   20.913   21.368
 ot_mini                             99 10.5    0.001    0.001   13.846   13.957
 multiply_cannon_multrec          24660 15.4    4.242    6.829   12.636   13.863
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.266   12.398
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.016   12.265   12.398
 init_scf_loop                       11  6.9    0.005    0.037   11.525   11.526
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.836   10.950
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.721    9.812
 prepare_preconditioner              11  7.9    0.000    0.000    9.737    9.752
 make_preconditioner                 11  8.9    0.000    0.001    9.737    9.752
 make_full_inverse_cholesky          11  9.9    0.000    0.000    7.975    9.407
 dbcsr_mm_accdrv_process          52304 16.0    6.719    8.505    8.247    9.192
 sum_up_and_integrate               110 10.3    0.068    0.071    6.547    6.561
 integrate_v_rspace                 110 11.3    0.003    0.006    6.479    6.491
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.176    6.185
 calculate_rho_elec                 110  8.6    0.077    0.081    6.175    6.184
 mp_waitall_1                    121746 16.5    4.151    6.003    4.151    6.003
 make_m2s                          4110 13.4    0.060    0.062    5.279    5.664
 qs_ot_get_p                        110 10.4    0.001    0.001    5.500    5.631
 init_scf_run                        11  5.9    0.000    0.001    5.531    5.532
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.531    5.532
 make_images                       4110 14.4    0.573    0.699    5.139    5.521
 cp_fm_upper_to_full                 70 13.8    3.235    4.601    3.235    4.601
 ot_diis_step                        99 11.5    0.011    0.011    4.090    4.091
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.018    4.082
 apply_single                       110 13.6    0.000    0.000    4.018    4.082
 pw_transfer                       1331 11.6    0.065    0.074    3.910    3.937
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.804    3.835
 dbcsr_complete_redistribute        325 12.2    0.423    0.475    2.671    3.782
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    3.762    3.775
 density_rs2pw                      110  9.6    0.005    0.006    3.101    3.614
 grid_integrate_task_list           110 12.3    3.255    3.499    3.255    3.499
 multiply_cannon_sync_h2d         24660 15.4    3.226    3.466    3.226    3.466
 fft_wrap_pw1pw2_140                451 13.1    0.204    0.215    3.280    3.314
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.243    3.285
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.276    3.277
 copy_fm_to_dbcsr                   174 11.2    0.001    0.002    2.147    3.245
 multiply_cannon_metrocomm3       24660 15.4    0.035    0.036    1.419    3.176
 fft3d_ps                          1111 14.6    1.089    1.121    3.097    3.127
 hybrid_alltoall_any               4261 16.3    0.119    0.459    2.279    3.058
 make_images_data                  4110 15.4    0.046    0.050    2.659    3.049
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.962    3.019
 wfi_extrapolate                     11  7.9    0.001    0.001    2.982    2.982
 calculate_dm_sparse                110  9.5    0.001    0.001    2.904    2.930
 rs_pw_transfer                     902 11.9    0.010    0.012    2.426    2.885
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.753    2.835
 mp_alltoall_i22                    605 13.7    1.654    2.814    1.654    2.814
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.802    2.804
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.748    2.750
 cp_fm_redistribute_end              48 14.0    1.376    2.723    1.377    2.724
 cp_fm_diag_elpa_base                48 14.0    1.267    2.591    1.344    2.697
 cp_fm_cholesky_invert               11 10.9    2.658    2.666    2.658    2.666
 grid_collocate_task_list           110  9.6    2.182    2.466    2.182    2.466
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.423    2.463
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.446    2.450
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    2.322    2.324
 potential_pw2rs                    110 12.3    0.012    0.013    2.208    2.213
 mp_alltoall_d11v                  2046 13.8    1.738    2.025    1.738    2.025
 jit_kernel_multiply                 11 15.7    1.198    1.982    1.198    1.982
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.695    1.730
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.601    1.705
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.675    1.691
 cp_fm_cholesky_decompose            22 10.9    1.615    1.663    1.615    1.663
 mp_waitany                       13376 13.8    1.110    1.600    1.110    1.600
 multiply_cannon_metrocomm4       20550 15.4    0.058    0.061    0.813    1.596
 mp_sum_l                          6514 12.8    0.913    1.573    0.913    1.573
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.527    1.543
 mp_allgather_i34                  2055 14.4    0.520    1.526    0.520    1.526
 mp_irecv_dv                      62702 16.1    0.714    1.517    0.714    1.517
 acc_transpose_blocks             24660 15.4    0.105    0.109    1.489    1.512
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.265    1.372
 rs_pw_transfer_RS2PW_140           121 11.5    0.169    0.180    0.857    1.325
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=64.829000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=697.818182, yerr=11.960679
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             827.879424E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3521                  65574.
 MP_Allreduce         9838                    640.
 MP_Sync               100
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_comm_split          48
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.045   55.138   55.139
 qs_mol_dyn_low                       1  2.0    0.003    0.004   54.867   54.876
 qs_forces                           11  3.9    0.002    0.002   54.792   54.803
 qs_energies                         11  4.9    0.001    0.001   51.103   51.115
 scf_env_do_scf                      11  5.9    0.000    0.001   42.881   42.881
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.006   35.215   35.215
 velocity_verlet                     10  3.0    0.001    0.001   31.061   31.064
 dbcsr_multiply_generic            2055 12.4    0.104    0.105   22.431   22.539
 qs_scf_new_mos                      99  7.5    0.001    0.001   20.005   20.054
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   20.004   20.054
 ot_scf_mini                         99  9.5    0.002    0.002   18.775   18.795
 multiply_cannon                   2055 13.4    0.246    0.265   17.087   18.319
 multiply_cannon_loop              2055 14.4    0.319    0.330   15.748   16.004
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.030   12.070
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.014   12.030   12.069
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.691   10.725
 ot_mini                             99 10.5    0.001    0.001   10.124   10.139
 multiply_cannon_multrec           8220 15.4    3.203    4.477    7.412    8.340
 init_scf_loop                       11  6.9    0.000    0.000    7.619    7.619
 mp_waitall_1                    103326 16.6    5.865    7.439    5.865    7.439
 sum_up_and_integrate               110 10.3    0.079    0.080    6.568    6.581
 integrate_v_rspace                 110 11.3    0.003    0.003    6.489    6.503
 qs_rho_update_rho_low              110  7.6    0.001    0.001    6.366    6.379
 calculate_rho_elec                 110  8.6    0.115    0.116    6.365    6.379
 qs_ot_get_derivative                99 11.5    0.001    0.001    6.356    6.374
 prepare_preconditioner              11  7.9    0.000    0.000    5.943    5.948
 make_preconditioner                 11  8.9    0.000    0.000    5.943    5.948
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.533    5.610
 init_scf_run                        11  5.9    0.000    0.001    5.097    5.097
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.096    5.097
 dbcsr_mm_accdrv_process          17442 15.9    2.801    3.493    4.080    5.000
 qs_ot_get_p                        110 10.4    0.001    0.001    4.758    4.777
 make_m2s                          4110 13.4    0.038    0.040    4.140    4.430
 multiply_cannon_metrocomm3        8220 15.4    0.017    0.018    2.954    4.358
 make_images                       4110 14.4    0.634    0.691    4.011    4.303
 pw_transfer                       1331 11.6    0.066    0.071    4.149    4.160
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    4.042    4.057
 ot_diis_step                        99 11.5    0.012    0.012    3.748    3.748
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.674    3.698
 apply_single                       110 13.6    0.000    0.000    3.674    3.698
 fft_wrap_pw1pw2_140                451 13.1    0.215    0.219    3.511    3.527
 grid_integrate_task_list           110 12.3    3.362    3.514    3.362    3.514
 density_rs2pw                      110  9.6    0.004    0.004    3.109    3.484
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.468    3.471
 fft3d_ps                          1111 14.6    1.144    1.165    3.287    3.296
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.173    3.173
 multiply_cannon_sync_h2d          8220 15.4    2.920    3.025    2.920    3.025
 cp_fm_cholesky_invert               11 10.9    2.830    2.834    2.830    2.834
 hybrid_alltoall_any               4261 16.3    0.200    0.866    2.333    2.784
 make_images_data                  4110 15.4    0.038    0.043    2.373    2.784
 wfi_extrapolate                     11  7.9    0.001    0.001    2.730    2.730
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    2.681    2.692
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.666    2.667
 cp_fm_redistribute_end              48 14.0    0.678    2.641    0.682    2.642
 cp_fm_diag_elpa_base                48 14.0    1.780    2.455    1.952    2.612
 rs_pw_transfer                     902 11.9    0.010    0.010    2.147    2.549
 grid_collocate_task_list           110  9.6    2.279    2.539    2.279    2.539
 calculate_dm_sparse                110  9.5    0.001    0.001    2.452    2.495
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.467    2.468
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.269    2.270
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.119    2.133
 potential_pw2rs                    110 12.3    0.015    0.015    2.119    2.123
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.773    1.986
 mp_alltoall_d11v                  2046 13.8    1.614    1.871    1.614    1.871
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    1.844    1.859
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    1.736    1.745
 cp_fm_cholesky_decompose            22 10.9    1.672    1.686    1.672    1.686
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.674    1.679
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.515    1.635
 mp_allgather_i34                  2055 14.4    0.530    1.629    0.530    1.629
 dbcsr_complete_redistribute        325 12.2    0.562    0.577    1.409    1.496
 mp_waitany                        9240 13.8    1.050    1.475    1.050    1.475
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.422    1.431
 jit_kernel_multiply                  8 15.9    0.972    1.368    0.972    1.368
 qs_create_task_list                 11  7.9    0.000    0.001    1.222    1.326
 generate_qs_task_list               11  8.9    0.380    0.447    1.222    1.326
 multiply_cannon_metrocomm1        8220 15.4    0.020    0.021    0.706    1.217
 rs_pw_transfer_RS2PW_140           121 11.5    0.161    0.165    0.793    1.198
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.135    1.149
 rs_gather_matrices                 110 12.3    0.322    0.364    0.919    1.149
 multiply_cannon_metrocomm4        6165 15.4    0.018    0.019    0.483    1.107
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=55.139000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=783.090909, yerr=9.958592
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.364926E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67104.
 MP_Allreduce         9672                    819.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.038   86.494   86.495
 qs_mol_dyn_low                       1  2.0    0.003    0.004   86.191   86.200
 qs_forces                           11  3.9    0.002    0.002   86.114   86.124
 qs_energies                         11  4.9    0.001    0.002   81.942   81.952
 scf_env_do_scf                      11  5.9    0.001    0.001   71.870   71.870
 velocity_verlet                     10  3.0    0.001    0.001   55.359   55.365
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.006   43.803   43.805
 dbcsr_multiply_generic            2055 12.4    0.118    0.121   28.076   28.163
 init_scf_loop                       11  6.9    0.000    0.000   27.996   27.998
 prepare_preconditioner              11  7.9    0.000    0.000   25.973   25.979
 make_preconditioner                 11  8.9    0.000    0.000   25.973   25.979
 qs_scf_new_mos                      99  7.5    0.001    0.001   25.479   25.516
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   25.478   25.515
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.271   25.471
 ot_scf_mini                         99  9.5    0.002    0.002   23.746   23.765
 multiply_cannon                   2055 13.4    0.346    0.377   21.330   22.171
 multiply_cannon_loop              2055 14.4    0.340    0.345   19.581   19.921
 cp_fm_upper_to_full                 70 14.2   12.611   18.054   12.611   18.054
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.174   14.199
 qs_ks_build_kohn_sham_matrix       110  9.3    0.014    0.015   14.174   14.199
 ot_mini                             99 10.5    0.001    0.001   13.149   13.170
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.830   12.853
 dbcsr_complete_redistribute        325 12.2    1.016    1.042    7.235   10.427
 multiply_cannon_multrec           8220 15.4    4.351    4.558    9.520    9.606
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.237    9.421
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.687    8.832
 qs_ot_get_derivative                99 11.5    0.001    0.001    8.695    8.715
 mp_alltoall_i22                    605 13.7    5.318    8.511    5.318    8.511
 mp_waitall_1                     84994 16.7    7.212    7.994    7.212    7.994
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.749    7.783
 calculate_rho_elec                 110  8.6    0.227    0.227    7.749    7.783
 sum_up_and_integrate               110 10.3    0.150    0.151    7.374    7.388
 integrate_v_rspace                 110 11.3    0.004    0.004    7.224    7.238
 init_scf_run                        11  5.9    0.000    0.001    5.805    5.805
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.805    5.805
 make_m2s                          4110 13.4    0.043    0.044    5.120    5.623
 qs_ot_get_p                        110 10.4    0.001    0.001    5.493    5.517
 make_images                       4110 14.4    0.874    0.928    4.932    5.433
 pw_transfer                       1331 11.6    0.075    0.075    5.300    5.305
 dbcsr_mm_accdrv_process          11614 15.7    3.257    3.976    5.026    5.281
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    5.183    5.190
 cp_fm_cholesky_invert               11 10.9    5.154    5.158    5.154    5.158
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.569    5.032
 apply_single                       110 13.6    0.000    0.000    4.569    5.031
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.018    4.545    4.880
 fft_wrap_pw1pw2_140                451 13.1    0.221    0.222    4.522    4.528
 ot_diis_step                        99 11.5    0.015    0.015    4.439    4.439
 fft3d_ps                          1111 14.6    1.295    1.312    4.378    4.387
 qs_ot_p2m_diag                      48 11.0    0.150    0.155    4.008    4.015
 multiply_cannon_sync_h2d          8220 15.4    3.951    3.959    3.951    3.959
 density_rs2pw                      110  9.6    0.004    0.004    3.784    3.814
 grid_integrate_task_list           110 12.3    3.664    3.719    3.664    3.719
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.678    3.689
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.575    3.575
 hybrid_alltoall_any               4261 16.3    0.257    0.552    2.823    3.547
 make_images_data                  4110 15.4    0.042    0.045    2.784    3.388
 wfi_extrapolate                     11  7.9    0.001    0.001    3.273    3.273
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.792    3.227
 calculate_dm_sparse                110  9.5    0.001    0.001    3.091    3.113
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.992    2.996
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.991    2.992
 cp_fm_diag_elpa_base                48 14.0    2.466    2.662    2.990    2.990
 grid_collocate_task_list           110  9.6    2.630    2.657    2.630    2.657
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.515    2.523
 potential_pw2rs                    110 12.3    0.021    0.022    2.512    2.515
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.409    2.411
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.302    2.315
 rs_pw_transfer                     902 11.9    0.011    0.011    2.248    2.278
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    2.212    2.271
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.082    2.160
 mp_alltoall_d11v                  2046 13.8    1.903    1.997    1.903    1.997
 cp_fm_cholesky_decompose            22 10.9    1.964    1.979    1.964    1.979
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.948    1.951
 qs_create_task_list                 11  7.9    0.001    0.001    1.894    1.943
 generate_qs_task_list               11  8.9    0.736    0.789    1.894    1.942
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.739    1.760
 jit_kernel_multiply                 10 15.4    1.569    1.734    1.569    1.734
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=86.495000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1225.363636, yerr=62.124908
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420242647040       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528903135232       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514766E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755941440       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             627.814400E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955383376
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4085                  56760.
 MP_Allreduce        11253                    785.
 MP_Sync               170
 MP_Alltoall          2226                2520936.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_comm_split          83
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.013    0.029  202.532  202.533
 qs_mol_dyn_low                       1  2.0    0.003    0.003  202.138  202.153
 qs_forces                           11  3.9    0.003    0.003  202.049  202.053
 qs_energies                         11  4.9    0.001    0.002  196.436  196.454
 scf_env_do_scf                      11  5.9    0.001    0.001  179.793  179.797
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  159.264  159.266
 dbcsr_multiply_generic            2507 12.6    0.177    0.181  122.408  123.153
 velocity_verlet                     10  3.0    0.001    0.001  122.565  122.566
 qs_scf_new_mos                     117  7.6    0.001    0.001  118.511  118.903
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  118.510  118.902
 ot_scf_mini                        117  9.6    0.003    0.003  111.999  112.392
 multiply_cannon                   2507 13.6    0.238    0.248  100.403  102.399
 multiply_cannon_loop              2507 14.6    2.032    2.102   98.175  100.133
 ot_mini                            117 10.6    0.001    0.001   64.394   64.779
 multiply_cannon_multrec          60168 15.6   33.795   35.873   41.816   43.689
 qs_ot_get_derivative               117 11.6    0.001    0.001   39.615   39.996
 rebuild_ks_matrix                  128  8.3    0.001    0.001   34.168   34.725
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   34.167   34.725
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.732   31.268
 mp_waitall_1                    267128 16.5   27.282   30.491   27.282   30.491
 multiply_cannon_sync_h2d         60168 15.6   27.933   30.347   27.933   30.347
 qs_ot_get_p                        128 10.4    0.001    0.001   25.976   26.348
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.357   25.011
 apply_single                       128 13.6    0.001    0.001   24.356   25.011
 ot_diis_step                       117 11.6    0.008    0.008   24.529   24.531
 init_scf_loop                       11  6.9    0.000    0.000   20.451   20.453
 qs_ot_p2m_diag                      83 11.4    0.078    0.091   19.345   19.396
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   18.092   18.360
 multiply_cannon_metrocomm3       60168 15.6    0.118    0.124   15.008   17.827
 cp_dbcsr_syevd                      83 12.4    0.004    0.005   17.054   17.056
 prepare_preconditioner              11  7.9    0.000    0.000   15.797   15.843
 make_preconditioner                 11  8.9    0.000    0.000   15.797   15.843
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.031   15.211
 sum_up_and_integrate               128 10.3    0.089    0.107   14.559   14.575
 integrate_v_rspace                 128 11.3    0.004    0.004   14.469   14.491
 make_m2s                          5014 13.6    0.104    0.117   13.797   14.315
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   14.175   14.181
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.091   14.172
 calculate_rho_elec                 128  8.7    0.045    0.065   14.091   14.171
 make_images                       5014 14.6    0.400    0.418   13.616   14.142
 cp_fm_redistribute_end              83 14.4   11.207   14.103   11.218   14.106
 cp_fm_diag_elpa_base                83 14.4    2.845   13.734    2.876   13.850
 init_scf_run                        11  5.9    0.000    0.001   12.401   12.401
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.401   12.401
 density_rs2pw                      128  9.7    0.007    0.008    7.464   10.584
 wfi_extrapolate                     11  7.9    0.001    0.001    9.248    9.249
 rs_pw_transfer                    1046 11.9    0.016    0.018    5.998    9.184
 cp_fm_cholesky_invert               11 10.9    8.921    8.929    8.921    8.929
 pw_transfer                       1547 11.6    0.076    0.101    8.245    8.515
 calculate_dm_sparse                128  9.5    0.001    0.001    8.324    8.408
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.012    8.039    8.313
 dbcsr_mm_accdrv_process         124484 16.2    3.007    3.220    7.571    8.164
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    7.903    8.062
 mp_sum_l                          7870 13.0    6.930    8.013    6.930    8.013
 make_images_data                  5014 15.6    0.066    0.073    6.738    7.771
 grid_integrate_task_list           128 12.3    7.040    7.607    7.040    7.607
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    7.502    7.557
 multiply_cannon_metrocomm1       60168 15.6    0.089    0.096    5.680    7.552
 hybrid_alltoall_any               5200 16.5    0.290    2.249    5.898    7.273
 fft_wrap_pw1pw2_140                523 13.2    0.444    0.515    6.924    7.245
 fft3d_ps                          1291 14.7    2.102    2.888    6.828    7.006
 cp_dbcsr_sm_fm_multiply             37  9.5    0.003    0.003    6.654    6.664
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.772    5.966
 mp_alltoall_d11v                  2415 14.1    4.296    5.856    4.296    5.856
 mp_waitany                       16020 13.9    2.640    5.831    2.640    5.831
 grid_collocate_task_list           128  9.7    4.552    5.782    4.552    5.782
 rs_pw_transfer_RS2PW_140           139 11.5    0.280    0.295    2.116    5.259
 potential_pw2rs                    128 12.3    0.009    0.011    4.897    4.927
 cp_fm_cholesky_decompose            22 10.9    4.687    4.703    4.687    4.703
 mp_sum_d                          4462 12.2    3.426    4.144    3.426    4.144
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=202.533000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=594.272727, yerr=6.743224
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430460020736       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1958505086976       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986244964352       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992000282624       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753956716544       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613089636352       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239146475520       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239146475520       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911124992000       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.228663E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.199914E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806316384       0.0%      0.0%    100.0%
 number of processed stacks               6022464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.2
 marketing flops                   145.647559E+12
 -------------------------------------------------------------------------------
 # multiplications                           2527
 max memory usage/rank             832.897024E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2425920
 MPI messages size (bytes):
  total size                         4.132350E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.703416E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               71436               2336489472
     32768 < size <=   131072              728832              55956209664
    131072 < size <=  4194304             1386864            1409906900992
   4194304 < size <= 16777216              155760            1473826772352
  16777216 < size                           68112            1190343475200
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4107                  56904.
 MP_Allreduce        11307                    944.
 MP_Sync               170
 MP_Alltoall          1983                4616149.
 MP_SendRecv         12126                  47072.
 MP_ISendRecv        12126                  47072.
 MP_Wait             26114
 MP_comm_split          83
 MP_ISend            11836                 212447.
 MP_IRecv            11836                 212447.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.043    0.073  191.914  191.915
 qs_mol_dyn_low                       1  2.0    0.003    0.004  191.456  191.469
 qs_forces                           11  3.9    0.003    0.007  191.374  191.376
 qs_energies                         11  4.9    0.002    0.010  184.550  184.560
 scf_env_do_scf                      11  5.9    0.001    0.002  168.232  168.242
 scf_env_do_scf_inner_loop          118  6.6    0.005    0.026  135.378  135.381
 velocity_verlet                     10  3.0    0.001    0.001  121.067  121.069
 dbcsr_multiply_generic            2527 12.6    0.186    0.193   97.849   99.092
 qs_scf_new_mos                     118  7.6    0.001    0.001   94.623   95.190
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   94.622   95.189
 ot_scf_mini                        118  9.6    0.004    0.005   89.770   90.380
 multiply_cannon                   2527 13.6    0.481    0.536   77.678   82.194
 multiply_cannon_loop              2527 14.6    1.261    1.304   74.424   77.128
 ot_mini                            118 10.6    0.001    0.001   50.190   50.755
 mp_waitall_1                    216438 16.6   25.084   38.602   25.084   38.602
 multiply_cannon_multrec          30324 15.6   21.939   26.164   31.566   36.182
 rebuild_ks_matrix                  129  8.3    0.001    0.001   33.752   34.425
 qs_ks_build_kohn_sham_matrix       129  9.3    0.017    0.020   33.751   34.424
 init_scf_loop                       11  6.9    0.001    0.004   32.763   32.764
 qs_ks_update_qs_env                129  7.6    0.001    0.001   30.408   31.018
 qs_ot_get_derivative               118 11.6    0.001    0.002   27.898   28.507
 prepare_preconditioner              11  7.9    0.000    0.000   28.329   28.430
 make_preconditioner                 11  8.9    0.000    0.002   28.329   28.430
 multiply_cannon_metrocomm3       30324 15.6    0.097    0.103   16.159   28.299
 make_full_inverse_cholesky          11  9.9    0.000    0.000   27.028   27.607
 apply_preconditioner_dbcsr         129 12.6    0.000    0.000   22.351   23.449
 apply_single                       129 13.6    0.001    0.001   22.350   23.448
 ot_diis_step                       118 11.6    0.014    0.015   22.116   22.118
 qs_ot_get_p                        129 10.4    0.001    0.002   21.256   21.982
 multiply_cannon_sync_h2d         30324 15.6   19.172   21.859   19.172   21.859
 cp_fm_cholesky_invert               11 10.9   16.625   16.637   16.625   16.637
 qs_ot_p2m_diag                      83 11.4    0.187    0.215   16.448   16.485
 make_m2s                          5054 13.6    0.089    0.095   14.396   15.947
 make_images                       5054 14.6    1.154    1.346   14.185   15.738
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   15.311   15.313
 sum_up_and_integrate               129 10.3    0.118    0.133   15.011   15.044
 integrate_v_rspace                 129 11.3    0.003    0.005   14.893   14.932
 qs_rho_update_rho_low              129  7.7    0.001    0.001   14.373   14.407
 calculate_rho_elec                 129  8.7    0.089    0.106   14.373   14.406
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   12.189   12.202
 cp_fm_redistribute_end              83 14.4    7.119   12.130    7.128   12.131
 cp_fm_diag_elpa_base                83 14.4    4.765   11.617    4.980   11.982
 init_scf_run                        11  5.9    0.000    0.001   11.490   11.491
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   11.489   11.491
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   10.909   11.338
 multiply_cannon_metrocomm4       27797 15.6    0.098    0.113    3.828   11.091
 mp_irecv_dv                      70031 16.3    3.632   10.699    3.632   10.699
 make_images_data                  5054 15.6    0.064    0.073    8.815   10.632
 density_rs2pw                      129  9.7    0.006    0.007    7.664   10.068
 hybrid_alltoall_any               5240 16.5    0.344    1.504    7.582   10.032
 dbcsr_mm_accdrv_process          62734 16.2    4.557    5.309    9.086    9.682
 pw_transfer                       1559 11.6    0.087    0.108    9.197    9.278
 fft_wrap_pw1pw2                   1301 12.7    0.011    0.012    8.970    9.059
 wfi_extrapolate                     11  7.9    0.001    0.001    8.373    8.373
 rs_pw_transfer                    1054 12.0    0.014    0.016    5.843    8.263
 fft_wrap_pw1pw2_140                527 13.2    0.477    0.532    7.916    8.020
 grid_integrate_task_list           129 12.3    7.234    7.679    7.234    7.679
 fft3d_ps                          1301 14.7    2.773    2.956    7.319    7.355
 qs_ot_get_derivative_taylor         41 13.0    0.001    0.001    6.317    7.076
 cp_fm_cholesky_decompose            22 10.9    6.842    6.924    6.842    6.924
 calculate_dm_sparse                129  9.5    0.001    0.001    6.502    6.667
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.088    6.097
 mp_sum_l                          7930 13.1    3.994    5.968    3.994    5.968
 grid_collocate_task_list           129  9.7    4.743    5.929    4.743    5.929
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    5.393    5.463
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.220    5.409
 potential_pw2rs                    129 12.3    0.014    0.016    5.216    5.239
 mp_allgather_i34                  2527 14.6    1.835    5.098    1.835    5.098
 mp_waitany                       11836 13.9    2.512    4.946    2.512    4.946
 mp_alltoall_d11v                  2423 14.1    4.145    4.547    4.145    4.547
 rs_pw_transfer_RS2PW_140           140 11.5    0.353    0.381    2.099    4.505
 mp_sum_d                          4491 12.2    2.662    4.043    2.662    4.043
 dbcsr_complete_redistribute        395 12.7    0.789    0.869    3.121    3.958
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=191.915000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=792.090909, yerr=6.331086
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.928533E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               3984192       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1695.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             931.459072E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1042912
 MPI messages size (bytes):
  total size                         2.716210E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.604448E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              281856              36943429632
    131072 < size <=  4194304              660064             996105256960
   4194304 < size <= 16777216               65632             931530938576
  16777216 < size                           28672             751619276800
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4075                  57335.
 MP_Allreduce        11226                    986.
 MP_Sync               170
 MP_Alltoall          1712                9388896.
 MP_SendRecv          7936                  75008.
 MP_ISendRecv         7936                  75008.
 MP_Wait             21820
 MP_comm_split          83
 MP_ISend            11748                 275205.
 MP_IRecv            11748                 275205.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.040    0.061  178.111  178.111
 qs_mol_dyn_low                       1  2.0    0.003    0.004  177.500  177.514
 qs_forces                           11  3.9    0.003    0.007  177.405  177.410
 qs_energies                         11  4.9    0.001    0.004  170.751  170.760
 scf_env_do_scf                      11  5.9    0.001    0.003  155.238  155.239
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.009  120.020  120.020
 velocity_verlet                     10  3.0    0.001    0.001  114.120  114.122
 dbcsr_multiply_generic            2507 12.6    0.181    0.185   81.920   83.188
 qs_scf_new_mos                     117  7.6    0.001    0.001   81.181   81.525
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   81.181   81.524
 ot_scf_mini                        117  9.6    0.004    0.004   77.017   77.371
 multiply_cannon                   2507 13.6    0.500    0.518   61.754   66.599
 multiply_cannon_loop              2507 14.6    0.853    0.876   58.687   61.344
 ot_mini                            117 10.6    0.001    0.001   42.629   42.962
 init_scf_loop                       11  6.9    0.000    0.002   35.112   35.113
 mp_waitall_1                    170520 16.6   25.700   34.717   25.700   34.717
 rebuild_ks_matrix                  128  8.3    0.001    0.001   31.249   31.690
 qs_ks_build_kohn_sham_matrix       128  9.3    0.016    0.019   31.248   31.689
 prepare_preconditioner              11  7.9    0.000    0.000   30.999   31.045
 make_preconditioner                 11  8.9    0.000    0.001   30.999   31.045
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.606   30.073
 qs_ks_update_qs_env                128  7.6    0.001    0.001   28.172   28.569
 multiply_cannon_multrec          20056 15.6   13.366   16.581   21.994   25.256
 multiply_cannon_metrocomm3       20056 15.6    0.059    0.064   15.587   24.597
 qs_ot_get_derivative               117 11.6    0.002    0.002   22.717   23.071
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   20.008   21.028
 apply_single                       128 13.6    0.001    0.001   20.008   21.027
 qs_ot_get_p                        128 10.4    0.001    0.001   19.595   19.996
 ot_diis_step                       117 11.6    0.018    0.019   19.808   19.808
 make_m2s                          5014 13.6    0.081    0.085   15.212   16.158
 make_images                       5014 14.6    1.165    1.248   14.978   15.919
 multiply_cannon_sync_h2d         20056 15.6   14.203   15.876   14.203   15.876
 qs_ot_p2m_diag                      83 11.4    0.265    0.272   15.386   15.393
 sum_up_and_integrate               128 10.3    0.135    0.145   14.870   14.901
 integrate_v_rspace                 128 11.3    0.004    0.005   14.735   14.761
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.538   14.565
 calculate_rho_elec                 128  8.7    0.132    0.147   14.538   14.564
 cp_fm_cholesky_invert               11 10.9   14.403   14.412   14.403   14.412
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   14.369   14.370
 cp_fm_diag_elpa                     83 13.4    0.000    0.001   11.178   11.179
 cp_fm_redistribute_end              83 14.4    4.228   11.119    4.244   11.121
 cp_fm_diag_elpa_base                83 14.4    6.447   10.529    6.857   11.006
 make_images_data                  5014 15.6    0.060    0.067    9.533   10.930
 init_scf_run                        11  5.9    0.000    0.001   10.399   10.400
 scf_env_initial_rho_setup           11  6.9    0.001    0.003   10.399   10.399
 hybrid_alltoall_any               5200 16.5    0.435    2.003    8.252    9.837
 multiply_cannon_metrocomm4       17549 15.6    0.062    0.073    3.533    9.486
 pw_transfer                       1547 11.6    0.086    0.108    9.260    9.379
 density_rs2pw                      128  9.7    0.006    0.006    7.395    9.372
 mp_irecv_dv                      50230 16.2    3.411    9.240    3.411    9.240
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    8.934    9.181
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.012    9.035    9.161
 dbcsr_mm_accdrv_process          41502 16.2    4.319    4.894    8.086    8.265
 fft_wrap_pw1pw2_140                523 13.2    0.477    0.528    7.981    8.113
 grid_integrate_task_list           128 12.3    7.264    7.775    7.264    7.775
 cp_fm_upper_to_full                105 14.5    5.842    7.552    5.842    7.552
 wfi_extrapolate                     11  7.9    0.001    0.001    7.468    7.469
 fft3d_ps                          1291 14.7    2.664    2.894    7.302    7.382
 cp_fm_cholesky_decompose            22 10.9    7.303    7.339    7.303    7.339
 rs_pw_transfer                    1046 11.9    0.014    0.015    5.294    7.269
 dbcsr_complete_redistribute        395 12.7    1.167    1.194    4.656    6.476
 calculate_dm_sparse                128  9.5    0.001    0.001    5.757    5.859
 grid_collocate_task_list           128  9.7    4.910    5.737    4.910    5.737
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.427    5.437
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.460    5.279
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.522    5.198
 mp_alltoall_d11v                  2415 14.1    4.432    5.165    4.432    5.165
 potential_pw2rs                    128 12.3    0.020    0.023    4.936    4.955
 mp_allgather_i34                  2507 14.6    1.608    4.798    1.608    4.798
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.541    4.690
 mp_sum_l                          7870 13.0    3.148    4.675    3.148    4.675
 mp_waitany                       11748 13.9    2.421    4.476    2.421    4.476
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    2.373    4.113
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    4.031    4.069
 rs_pw_transfer_RS2PW_140           139 11.5    0.331    0.350    1.979    3.950
 mp_alltoall_i22                    716 14.1    1.964    3.875    1.964    3.875
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    3.841    3.843
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.588    3.632
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=178.111000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=883.727273, yerr=9.808076
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022121472       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444702699520       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796573E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.320339E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705499744       0.0%      0.0%    100.0%
 number of processed stacks               5927808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1131.2
 marketing flops                   143.508480E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               1.130082E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1133160
 MPI messages size (bytes):
  total size                         2.008142E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.772161E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              315952              35695099904
    131072 < size <=  4194304              709496             778939400192
   4194304 < size <= 16777216               69840             660837692480
  16777216 < size                           30480             532676608000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4085                  57191.
 MP_Allreduce        11251                   1068.
 MP_Sync               168
 MP_Alltoall          1700               12496376.
 MP_SendRecv          5842                  75008.
 MP_ISendRecv         5842                  75008.
 MP_Wait             22272
 MP_comm_split          82
 MP_ISend            14840                 244848.
 MP_IRecv            14840                 244848.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.099    0.193  188.116  188.118
 qs_mol_dyn_low                       1  2.0    0.003    0.005  187.466  187.478
 qs_forces                           11  3.9    0.003    0.008  187.356  187.361
 qs_energies                         11  4.9    0.002    0.006  180.141  180.151
 scf_env_do_scf                      11  5.9    0.001    0.003  163.438  163.445
 velocity_verlet                     10  3.0    0.001    0.001  124.109  124.112
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  116.954  116.956
 qs_scf_new_mos                     116  7.6    0.001    0.001   79.262   79.561
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   79.262   79.560
 dbcsr_multiply_generic            2485 12.5    0.187    0.192   78.447   79.156
 ot_scf_mini                        116  9.6    0.003    0.005   74.819   75.097
 multiply_cannon                   2485 13.5    0.549    0.591   54.075   56.917
 multiply_cannon_loop              2485 14.5    1.166    1.197   50.514   52.089
 init_scf_loop                       11  6.9    0.001    0.006   46.359   46.360
 prepare_preconditioner              11  7.9    0.000    0.001   42.216   42.255
 make_preconditioner                 11  8.9    0.000    0.002   42.216   42.255
 ot_mini                            116 10.6    0.001    0.001   41.866   42.141
 make_full_inverse_cholesky          11  9.9    0.000    0.000   35.859   40.773
 multiply_cannon_multrec          29820 15.5   13.968   19.223   25.780   30.766
 rebuild_ks_matrix                  127  8.3    0.001    0.001   29.997   30.246
 qs_ks_build_kohn_sham_matrix       127  9.3    0.017    0.024   29.996   30.245
 mp_waitall_1                    146592 16.7   17.281   27.430   17.281   27.430
 qs_ks_update_qs_env                127  7.6    0.001    0.001   26.998   27.209
 qs_ot_get_derivative               116 11.6    0.002    0.002   22.436   22.716
 make_m2s                          4970 13.5    0.094    0.098   20.122   21.169
 make_images                       4970 14.5    1.929    2.234   19.827   20.878
 qs_ot_get_p                        127 10.4    0.001    0.001   19.303   19.600
 apply_preconditioner_dbcsr         127 12.6    0.000    0.001   18.896   19.415
 apply_single                       127 13.6    0.001    0.001   18.896   19.415
 ot_diis_step                       116 11.6    0.017    0.018   19.304   19.306
 cp_fm_cholesky_invert               11 10.9   16.250   16.259   16.250   16.259
 cp_fm_upper_to_full                104 14.7   10.894   16.121   10.894   16.121
 qs_ot_p2m_diag                      82 11.4    0.338    0.385   15.240   15.290
 sum_up_and_integrate               127 10.3    0.140    0.153   14.848   14.870
 qs_rho_update_rho_low              127  7.7    0.001    0.002   14.762   14.791
 calculate_rho_elec                 127  8.7    0.174    0.189   14.761   14.790
 integrate_v_rspace                 127 11.3    0.004    0.006   14.708   14.734
 multiply_cannon_metrocomm3       29820 15.5    0.046    0.048    6.484   14.693
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   13.977   13.979
 dbcsr_complete_redistribute        393 12.7    1.510    1.635    9.055   12.766
 make_images_data                  4970 15.5    0.063    0.067   10.866   12.692
 multiply_cannon_sync_h2d         29820 15.5   11.590   12.336   11.590   12.336
 dbcsr_mm_accdrv_process          61748 16.2    7.195    7.977   11.388   11.877
 hybrid_alltoall_any               5155 16.4    0.521    2.172    9.785   11.700
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002    7.666   11.341
 cp_fm_diag_elpa                     82 13.4    0.000    0.001   10.755   10.757
 init_scf_run                        11  5.9    0.000    0.001   10.750   10.752
 scf_env_initial_rho_setup           11  6.9    0.001    0.002   10.750   10.752
 cp_fm_redistribute_end              82 14.4    1.858   10.671    1.873   10.676
 cp_fm_diag_elpa_base                82 14.4    8.199   10.071    8.768   10.542
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    6.334    9.953
 pw_transfer                       1535 11.6    0.087    0.104    9.537    9.621
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.239    9.447
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    9.311    9.401
 mp_alltoall_i22                    712 14.1    5.509    9.200    5.509    9.200
 density_rs2pw                      127  9.7    0.006    0.006    7.226    8.781
 fft_wrap_pw1pw2_140                519 13.2    0.478    0.495    8.253    8.348
 grid_integrate_task_list           127 12.3    7.464    7.831    7.464    7.831
 cp_fm_cholesky_decompose            22 10.9    7.569    7.711    7.569    7.711
 wfi_extrapolate                     11  7.9    0.001    0.001    7.669    7.669
 fft3d_ps                          1281 14.7    2.751    2.839    7.551    7.620
 multiply_cannon_metrocomm4       24850 15.5    0.076    0.086    2.783    7.233
 mp_irecv_dv                      75445 16.2    2.640    6.961    2.640    6.961
 rs_pw_transfer                    1038 11.9    0.013    0.014    4.750    6.365
 calculate_dm_sparse                127  9.5    0.001    0.001    6.102    6.180
 mp_alltoall_d11v                  2401 14.1    5.103    5.931    5.103    5.931
 grid_collocate_task_list           127  9.7    5.028    5.772    5.028    5.772
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.390    5.437
 potential_pw2rs                    127 12.3    0.022    0.022    4.685    4.697
 qs_energies_init_hamiltonians       11  5.9    0.001    0.004    4.491    4.492
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.327    4.438
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.332    4.412
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    4.159    4.212
 mp_waitany                       14840 13.9    2.262    3.851    2.262    3.851
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=188.118000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1071.545455, yerr=9.277183
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410024443904       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444712984576       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019188129792       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019188129792       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796586E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.820059E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705502176       0.0%      0.0%    100.0%
 number of processed stacks               1944496       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3448.5
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               1.531802E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  238560
 MPI messages size (bytes):
  total size                         1.321104E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.537828E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              112800              59139686400
   4194304 < size <= 16777216              104112             545846722560
  16777216 < size                           20064             716108700816
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8852                     52.
 MP_Alltoall          9584                 804353.
 MP_ISend            39716                2104723.
 MP_IRecv            39716                2103824.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4003                  58193.
 MP_Allreduce        11005                   1175.
 MP_Sync                86
 MP_Alltoall          1700               18828148.
 MP_SendRecv          3810                 122880.
 MP_ISendRecv         3810                 122880.
 MP_Wait             16000
 MP_ISend            10600                 423612.
 MP_IRecv            10600                 423612.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.037  173.984  173.984
 qs_mol_dyn_low                       1  2.0    0.003    0.010  173.588  173.602
 qs_forces                           11  3.9    0.010    0.013  173.499  173.503
 qs_energies                         11  4.9    0.001    0.002  165.945  165.954
 scf_env_do_scf                      11  5.9    0.001    0.001  148.866  148.868
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  113.232  113.234
 velocity_verlet                     10  3.0    0.001    0.001  113.204  113.208
 qs_scf_new_mos                     116  7.6    0.001    0.001   74.459   74.579
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   74.458   74.579
 dbcsr_multiply_generic            2485 12.5    0.180    0.187   73.629   74.154
 ot_scf_mini                        116  9.6    0.003    0.004   70.079   70.216
 multiply_cannon                   2485 13.5    0.576    0.615   54.093   57.913
 multiply_cannon_loop              2485 14.5    0.439    0.449   49.755   50.811
 ot_mini                            116 10.6    0.001    0.001   39.238   39.383
 init_scf_loop                       11  6.9    0.000    0.000   35.480   35.484
 mp_waitall_1                    124680 16.7   26.204   34.208   26.204   34.208
 prepare_preconditioner              11  7.9    0.000    0.000   31.530   31.571
 make_preconditioner                 11  8.9    0.000    0.000   31.530   31.571
 rebuild_ks_matrix                  127  8.3    0.001    0.001   29.979   30.121
 qs_ks_build_kohn_sham_matrix       127  9.3    0.018    0.018   29.978   30.121
 make_full_inverse_cholesky          11  9.9    0.000    0.000   29.458   29.719
 qs_ks_update_qs_env                127  7.6    0.001    0.001   27.252   27.385
 multiply_cannon_multrec           9940 15.5   10.305   14.393   17.534   22.013
 multiply_cannon_metrocomm3        9940 15.5    0.023    0.024   12.743   20.326
 qs_ot_get_derivative               116 11.6    0.001    0.002   19.664   19.803
 ot_diis_step                       116 11.6    0.019    0.020   19.504   19.505
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.084   19.398
 apply_single                       127 13.6    0.001    0.001   19.084   19.398
 qs_ot_get_p                        127 10.4    0.001    0.001   18.017   18.142
 cp_fm_cholesky_invert               11 10.9   18.069   18.075   18.069   18.075
 make_m2s                          4970 13.5    0.066    0.070   15.902   17.985
 make_images                       4970 14.5    2.234    2.725   15.598   17.674
 qs_rho_update_rho_low              127  7.7    0.001    0.001   15.436   15.467
 calculate_rho_elec                 127  8.7    0.257    0.266   15.436   15.467
 sum_up_and_integrate               127 10.3    0.179    0.189   15.078   15.130
 integrate_v_rspace                 127 11.3    0.004    0.006   14.899   14.959
 qs_ot_p2m_diag                      82 11.4    0.489    0.494   14.255   14.268
 cp_dbcsr_syevd                      82 12.4    0.005    0.005   13.130   13.131
 multiply_cannon_sync_h2d          9940 15.5   11.418   12.389   11.418   12.389
 make_images_data                  4970 15.5    0.050    0.059    9.725   12.117
 hybrid_alltoall_any               5155 16.4    0.837    3.789    9.564   11.532
 init_scf_run                        11  5.9    0.000    0.001   10.348   10.348
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.348   10.348
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   10.105   10.117
 cp_fm_diag_elpa_base                82 14.4    9.869    9.947   10.099   10.112
 pw_transfer                       1535 11.6    0.085    0.093    9.887    9.926
 fft_wrap_pw1pw2                   1281 12.7    0.010    0.011    9.666    9.711
 fft_wrap_pw1pw2_140                519 13.2    0.492    0.513    8.519    8.566
 grid_integrate_task_list           127 12.3    7.696    8.140    7.696    8.140
 multiply_cannon_metrocomm1        9940 15.5    0.028    0.028    4.478    8.078
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    7.909    8.009
 cp_fm_cholesky_decompose            22 10.9    7.903    7.997    7.903    7.997
 density_rs2pw                      127  9.7    0.005    0.006    7.083    7.957
 fft3d_ps                          1281 14.7    2.688    2.757    7.842    7.864
 dbcsr_mm_accdrv_process          20590 16.1    2.683    3.386    6.868    7.508
 wfi_extrapolate                     11  7.9    0.001    0.001    7.405    7.405
 mp_allgather_i34                  2485 14.5    2.579    6.562    2.579    6.562
 calculate_dm_sparse                127  9.5    0.001    0.001    6.026    6.139
 grid_collocate_task_list           127  9.7    5.351    6.011    5.351    6.011
 mp_alltoall_d11v                  2401 14.1    5.033    5.889    5.033    5.889
 dbcsr_complete_redistribute        393 12.7    2.100    2.186    5.129    5.487
 multiply_cannon_metrocomm4        7455 15.5    0.024    0.028    1.862    5.329
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    5.271    5.273
 mp_irecv_dv                      28618 15.9    1.825    5.244    1.825    5.244
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.172    5.179
 rs_pw_transfer                    1038 11.9    0.012    0.013    4.131    5.045
 potential_pw2rs                    127 12.3    0.026    0.027    4.589    4.602
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.079    4.117
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.566    3.871
 qs_ot_get_orbitals                 116 10.6    0.001    0.001    3.700    3.742
 copy_dbcsr_to_fm                   185 11.7    0.004    0.004    3.551    3.638
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.623    3.637
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002    3.319    3.618
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.448    3.487
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=173.984000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1434.272727, yerr=53.659009
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1410022950912       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1924145348608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1957871443968       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1963544850432       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2714615709696       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4377645416448       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5350455508992       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5395653328896       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6594687401984       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11444706349056       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15019187724288       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15019187724288       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19624853225472       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        92.796577E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.606413E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6705500544       0.0%      0.0%    100.0%
 number of processed stacks               1947808       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3442.6
 marketing flops                   143.507742E+12
 -------------------------------------------------------------------------------
 # multiplications                           2485
 max memory usage/rank               2.944807E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   99400
 MPI messages size (bytes):
  total size                         1.127422E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.342272E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               44768              34745614336
   4194304 < size <= 16777216               43984             376564613120
  16777216 < size                           10032             716108613552
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3949                  59908.
 MP_Allreduce        10870                   1518.
 MP_Sync                86
 MP_Alltoall          1700               36954374.
 MP_SendRecv          1778                 218624.
 MP_ISendRecv         1778                 218624.
 MP_Wait              9728
 MP_ISend             6360                1080477.
 MP_IRecv             6360                1080477.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.056  297.269  297.269
 qs_mol_dyn_low                       1  2.0    0.003    0.004  296.738  296.748
 qs_forces                           11  3.9    0.003    0.003  296.647  296.649
 qs_energies                         11  4.9    0.001    0.002  287.310  287.320
 scf_env_do_scf                      11  5.9    0.001    0.001  265.211  265.222
 velocity_verlet                     10  3.0    0.001    0.001  214.083  214.091
 scf_env_do_scf_inner_loop          116  6.6    0.003    0.008  139.396  139.398
 init_scf_loop                       11  6.9    0.000    0.000  125.562  125.565
 prepare_preconditioner              11  7.9    0.000    0.000  120.501  120.524
 make_preconditioner                 11  8.9    0.000    0.000  120.501  120.524
 make_full_inverse_cholesky          11  9.9    0.000    0.000   95.810  117.698
 qs_scf_new_mos                     116  7.6    0.001    0.001   89.033   89.179
 qs_scf_loop_do_ot                  116  8.6    0.001    0.001   89.033   89.179
 ot_scf_mini                        116  9.6    0.004    0.004   84.354   84.397
 dbcsr_multiply_generic            2485 12.5    0.210    0.217   81.733   82.189
 cp_fm_upper_to_full                104 14.8   53.516   77.256   53.516   77.256
 multiply_cannon                   2485 13.5    0.704    0.761   57.632   58.194
 multiply_cannon_loop              2485 14.5    0.464    0.480   54.010   55.159
 ot_mini                            116 10.6    0.001    0.001   43.788   43.829
 dbcsr_complete_redistribute        393 12.7    3.982    4.011   30.320   43.775
 copy_fm_to_dbcsr                   208 11.6    0.002    0.002   26.919   40.446
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000   24.644   38.088
 rebuild_ks_matrix                  127  8.3    0.001    0.001   38.003   38.035
 qs_ks_build_kohn_sham_matrix       127  9.3    0.018    0.018   38.002   38.034
 mp_alltoall_i22                    712 14.1   22.452   36.139   22.452   36.139
 qs_ks_update_qs_env                127  7.6    0.001    0.001   35.048   35.084
 cp_fm_cholesky_invert               11 10.9   33.284   33.290   33.284   33.290
 mp_waitall_1                    102768 16.8   27.460   31.364   27.460   31.364
 qs_ot_get_p                        127 10.4    0.001    0.001   25.397   25.447
 qs_ot_get_derivative               116 11.6    0.002    0.002   23.699   23.742
 qs_ot_p2m_diag                      82 11.4    0.867    0.873   21.502   21.530
 make_m2s                          4970 13.5    0.076    0.078   19.571   20.545
 qs_rho_update_rho_low              127  7.7    0.001    0.001   20.422   20.431
 calculate_rho_elec                 127  8.7    0.479    0.480   20.421   20.430
 make_images                       4970 14.5    3.721    3.882   19.095   20.071
 ot_diis_step                       116 11.6    0.022    0.022   20.053   20.053
 cp_dbcsr_syevd                      82 12.4    0.005    0.006   19.812   19.813
 sum_up_and_integrate               127 10.3    0.319    0.322   19.594   19.672
 apply_preconditioner_dbcsr         127 12.6    0.000    0.000   19.333   19.523
 apply_single                       127 13.6    0.001    0.001   19.333   19.523
 integrate_v_rspace                 127 11.3    0.004    0.004   19.275   19.353
 multiply_cannon_metrocomm3        9940 15.5    0.023    0.024   18.256   19.182
 multiply_cannon_multrec           9940 15.5   10.376   12.154   17.712   17.772
 cp_fm_diag_elpa                     82 13.4    0.000    0.000   16.646   16.647
 cp_fm_diag_elpa_base                82 14.4   12.282   13.934   16.642   16.643
 multiply_cannon_sync_h2d          9940 15.5   15.535   15.556   15.535   15.556
 make_images_data                  4970 15.5    0.059    0.065   10.447   12.234
 init_scf_run                        11  5.9    0.000    0.001   12.160   12.161
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.160   12.161
 hybrid_alltoall_any               5155 16.4    1.291    3.012   10.520   12.083
 pw_transfer                       1535 11.6    0.092    0.093   12.053   12.057
 fft_wrap_pw1pw2                   1281 12.7    0.011    0.011   11.820   11.823
 fft_wrap_pw1pw2_140                519 13.2    0.537    0.540   10.497   10.515
 fft3d_ps                          1281 14.7    2.718    2.729    9.868    9.882
 qs_ot_get_derivative_diag           76 12.4    0.002    0.002    9.195    9.234
 wfi_extrapolate                     11  7.9    0.001    0.001    9.086    9.086
 dbcsr_mm_accdrv_process          20590 16.0    3.728    5.522    7.099    8.917
 mp_alltoall_d11v                  2401 14.1    8.061    8.821    8.061    8.821
 cp_fm_cholesky_decompose            22 10.9    8.779    8.797    8.779    8.797
 grid_integrate_task_list           127 12.3    8.521    8.684    8.521    8.684
 density_rs2pw                      127  9.7    0.005    0.005    8.159    8.277
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    7.968    7.968
 calculate_dm_sparse                127  9.5    0.001    0.001    6.514    6.629
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.314    6.381
 grid_collocate_task_list           127  9.7    6.307    6.338    6.307    6.338
 rs_scatter_matrices                138  9.7    3.606    4.560    5.923    6.152
 copy_dbcsr_to_fm                   185 11.7    0.004    0.005    6.042    6.141
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=297.269000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2627.818182, yerr=166.054108
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.261609E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255669.
 MP_Allreduce         3059                   6274.
 MP_Sync                 4
 MP_Alltoall            54                6805335.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.016    0.034   85.279   85.280
 qs_energies                          1  2.0    0.000    0.000   84.858   84.872
 ls_scf                               1  3.0    0.000    0.000   83.949   83.962
 dbcsr_multiply_generic             111  6.7    0.014    0.015   72.913   73.109
 multiply_cannon                    111  7.7    0.017    0.021   56.163   57.471
 multiply_cannon_loop               111  8.7    0.200    0.218   52.658   54.258
 ls_scf_main                          1  4.0    0.000    0.000   52.393   52.397
 density_matrix_trs4                  2  5.0    0.002    0.003   46.801   46.881
 ls_scf_init_scf                      1  4.0    0.000    0.000   28.505   28.512
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   27.353   27.406
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   25.219   25.250
 mp_waitall_1                     11031 10.9   22.505   25.136   22.505   25.136
 multiply_cannon_multrec           2664  9.7    8.207    8.898   15.526   17.272
 multiply_cannon_sync_h2d          2664  9.7   13.817   16.199   13.817   16.199
 make_m2s                           222  7.7    0.008    0.011   13.098   13.587
 make_images                        222  8.7    0.099    0.110   13.076   13.566
 multiply_cannon_metrocomm1        2664  9.7    0.009    0.011    9.685   12.080
 make_images_data                   222  9.7    0.004    0.005    7.649    8.215
 hybrid_alltoall_any                227 10.6    0.215    1.823    6.550    8.055
 dbcsr_mm_accdrv_process           4760 10.4    0.507    0.638    6.935    7.965
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.011    5.502    7.834
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.229    7.161    6.229    7.161
 calculate_norms                   4752  9.8    5.511    6.240    5.511    6.240
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.075    5.211
 mp_sum_l                           807  5.4    3.110    4.639    3.110    4.639
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.339    3.485
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.014    2.058    3.476
 mp_irecv_dv                       6231 10.9    2.041    3.444    2.041    3.444
 make_images_sizes                  222  9.7    0.000    0.000    0.729    3.424
 mp_alltoall_i44                    222 10.7    0.729    3.424    0.729    3.424
 arnoldi_extremal                     4  6.8    0.000    0.000    3.234    3.261
 arnoldi_normal_ev                    4  7.8    0.001    0.003    3.234    3.261
 build_subspace                      16  8.4    0.009    0.012    3.138    3.140
 ls_scf_post                          1  4.0    0.000    0.000    3.051    3.064
 ls_scf_store_result                  1  5.0    0.000    0.000    2.864    2.911
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.367    2.847
 dbcsr_merge_single_wm              555 10.7    0.481    0.610    2.359    2.838
 make_images_pack                   222  9.7    2.207    2.621    2.208    2.623
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.013    2.320    2.557
 dbcsr_sort_data                    658 11.4    2.138    2.549    2.138    2.549
 dbcsr_matrix_vector_mult_local     304 10.0    2.069    2.457    2.071    2.459
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.325    2.435
 buffer_matrices_ensure_size        222  8.7    1.815    2.214    1.815    2.214
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.828    1.835
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.818    1.825
 qs_ks_build_kohn_sham_matrix         3  8.3    0.004    0.011    1.818    1.825
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=85.280000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1142.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.104234E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  10339.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.041   90.630   90.632
 qs_energies                          1  2.0    0.000    0.000   90.200   90.204
 ls_scf                               1  3.0    0.000    0.000   88.880   88.883
 dbcsr_multiply_generic             111  6.7    0.015    0.015   74.884   75.213
 multiply_cannon                    111  7.7    0.029    0.043   53.255   56.603
 ls_scf_main                          1  4.0    0.000    0.000   54.745   54.751
 multiply_cannon_loop               111  8.7    0.116    0.122   49.987   52.536
 density_matrix_trs4                  2  5.0    0.002    0.003   49.050   49.242
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.611   30.612
 mp_waitall_1                      9105 10.9   21.040   30.009   21.040   30.009
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.389   29.513
 multiply_cannon_multrec           1332  9.7   13.242   17.003   22.455   27.395
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.973   26.986
 multiply_cannon_metrocomm3        1332  9.7    0.006    0.007   11.747   21.034
 make_m2s                           222  7.7    0.006    0.007   15.187   15.867
 make_images                        222  8.7    1.569    1.921   15.157   15.839
 dbcsr_mm_accdrv_process           4041 10.4    0.288    0.453    8.812   10.350
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.405    9.920    8.405    9.920
 make_images_data                   222  9.7    0.004    0.004    8.754    9.687
 hybrid_alltoall_any                227 10.6    0.518    2.422    8.158    9.142
 multiply_cannon_metrocomm4        1221  9.7    0.006    0.008    3.229    7.832
 mp_irecv_dv                       3311 11.0    3.209    7.781    3.209    7.781
 mp_sum_l                           807  5.4    5.131    7.734    5.131    7.734
 calculate_norms                   2376  9.8    5.987    6.711    5.987    6.711
 multiply_cannon_sync_h2d          1332  9.7    4.839    6.123    4.839    6.123
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.936    6.034
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.011    5.215
 arnoldi_extremal                     4  6.8    0.000    0.000    4.656    4.677
 arnoldi_normal_ev                    4  7.8    0.001    0.005    4.656    4.677
 build_subspace                      16  8.4    0.014    0.021    4.400    4.404
 ls_scf_post                          1  4.0    0.000    0.000    3.524    3.528
 dbcsr_matrix_vector_mult           304  9.0    0.009    0.021    3.178    3.416
 ls_scf_store_result                  1  5.0    0.000    0.000    3.231    3.347
 dbcsr_matrix_vector_mult_local     304 10.0    2.781    3.283    2.783    3.285
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    1.171    2.953
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.615    2.713
 mp_allgather_i34                   111  8.7    0.974    2.421    0.974    2.421
 make_images_pack                   222  9.7    2.028    2.406    2.030    2.408
 dbcsr_sort_data                    436 11.2    1.809    2.024    1.809    2.024
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.881    1.883
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.868    1.870
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    1.868    1.870
 dbcsr_data_new                    4174 10.1    1.608    1.830    1.608    1.830
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=90.632000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1713.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.701648E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265470.
 MP_Allreduce         3058                  11181.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.030    0.044   94.321   94.322
 qs_energies                          1  2.0    0.000    0.000   93.730   93.739
 ls_scf                               1  3.0    0.000    0.001   92.328   92.337
 dbcsr_multiply_generic             111  6.7    0.015    0.016   76.674   76.964
 ls_scf_main                          1  4.0    0.000    0.009   57.665   57.674
 multiply_cannon                    111  7.7    0.045    0.137   52.813   56.508
 multiply_cannon_loop               111  8.7    0.099    0.105   49.294   53.368
 density_matrix_trs4                  2  5.0    0.002    0.004   51.657   51.835
 mp_waitall_1                      7281 11.0   23.936   33.951   23.936   33.951
 ls_scf_init_scf                      1  4.0    0.000    0.004   31.095   31.124
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   29.526   29.606
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.141   27.155
 multiply_cannon_multrec            888  9.7   12.668   15.404   21.173   24.521
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   10.886   23.305
 make_m2s                           222  7.7    0.006    0.007   17.128   18.389
 make_images                        222  8.7    1.970    2.277   17.089   18.350
 hybrid_alltoall_any                227 10.6    0.622    2.861    9.441   10.843
 make_images_data                   222  9.7    0.003    0.004    9.779   10.716
 dbcsr_mm_accdrv_process           3754 10.4    0.251    0.411    8.042    9.221
 mp_sum_l                           807  5.4    5.170    9.034    5.170    9.034
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.673    8.811    7.673    8.811
 multiply_cannon_sync_h2d           888  9.7    5.940    7.738    5.940    7.738
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.473    7.171
 mp_irecv_dv                       2335 11.1    2.458    7.125    2.458    7.125
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.970    7.083
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.897    7.041
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.923    5.112
 arnoldi_extremal                     4  6.8    0.000    0.000    5.061    5.080
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.061    5.080
 build_subspace                      16  8.4    0.014    0.020    4.754    4.761
 calculate_norms                   1584  9.8    4.318    4.715    4.318    4.715
 mp_allgather_i34                   111  8.7    1.366    3.830    1.366    3.830
 dbcsr_matrix_vector_mult           304  9.0    0.009    0.020    3.464    3.775
 dbcsr_matrix_vector_mult_local     304 10.0    3.058    3.618    3.060    3.619
 ls_scf_post                          1  4.0    0.001    0.011    3.568    3.579
 ls_scf_store_result                  1  5.0    0.000    0.000    3.309    3.393
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.863    2.976
 make_images_sizes                  222  9.7    0.000    0.000    1.019    2.234
 mp_alltoall_i44                    222 10.7    1.019    2.234    1.019    2.234
 dbcsr_sort_data                    325 11.1    1.881    2.192    1.881    2.192
 make_images_pack                   222  9.7    1.821    2.144    1.823    2.147
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.064    2.094
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.045    2.076
 qs_ks_build_kohn_sham_matrix         3  8.3    0.007    0.052    2.045    2.076
 dbcsr_data_release                9322 10.9    1.311    1.934    1.311    1.934
 dbcsr_finalize                     304  7.8    0.026    0.032    1.611    1.897
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=94.322000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2211.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.332252E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266696.
 MP_Allreduce         3058                  13371.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.043    0.077   97.044   97.046
 qs_energies                          1  2.0    0.000    0.001   96.365   96.370
 ls_scf                               1  3.0    0.000    0.001   94.693   94.697
 dbcsr_multiply_generic             111  6.7    0.016    0.019   78.324   78.548
 ls_scf_main                          1  4.0    0.000    0.002   58.821   58.823
 multiply_cannon                    111  7.7    0.048    0.091   51.586   55.631
 density_matrix_trs4                  2  5.0    0.003    0.006   52.672   52.802
 multiply_cannon_loop               111  8.7    0.115    0.124   46.596   49.468
 ls_scf_init_scf                      1  4.0    0.001    0.003   32.626   32.628
 ls_scf_init_matrix_S                 1  5.0    0.000    0.001   31.222   31.296
 mp_waitall_1                      6369 11.0   22.793   30.028   22.793   30.028
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   28.770   28.780
 multiply_cannon_multrec           1332  9.7   14.172   17.157   22.012   24.705
 make_m2s                           222  7.7    0.006    0.007   21.156   22.539
 make_images                        222  8.7    3.141    3.597   21.106   22.491
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.003    9.389   16.579
 make_images_data                   222  9.7    0.004    0.004   11.824   13.417
 hybrid_alltoall_any                227 10.6    0.798    3.781   11.207   13.073
 dbcsr_mm_accdrv_process           3641 10.4    0.236    0.405    7.479    8.993
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.106    8.576    7.106    8.576
 mp_sum_l                           807  5.4    4.029    7.043    4.029    7.043
 multiply_cannon_sync_h2d          1332  9.7    5.561    6.230    5.561    6.230
 multiply_cannon_metrocomm4        1110  9.7    0.004    0.006    2.095    5.968
 mp_irecv_dv                       3229 10.9    2.072    5.901    2.072    5.901
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.037    5.430
 arnoldi_extremal                     4  6.8    0.000    0.000    5.201    5.214
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.201    5.214
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.413    5.186
 build_subspace                      16  8.4    0.014    0.021    4.866    4.873
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.526    4.697
 calculate_norms                   2376  9.8    4.197    4.584    4.197    4.584
 mp_allgather_i34                   111  8.7    2.135    4.364    2.135    4.364
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.020    3.586    3.884
 dbcsr_matrix_vector_mult_local     304 10.0    3.198    3.707    3.200    3.709
 dbcsr_sort_data                    658 11.4    3.106    3.471    3.106    3.471
 ls_scf_post                          1  4.0    0.000    0.001    3.246    3.251
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.856    3.250
 dbcsr_merge_single_wm              555 10.7    0.538    0.676    2.847    3.242
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.064    3.117
 ls_scf_store_result                  1  5.0    0.000    0.000    3.001    3.049
 dbcsr_data_release               10477 10.7    1.577    2.410    1.577    2.410
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.142    2.144
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.118    2.120
 qs_ks_build_kohn_sham_matrix         3  8.3    0.013    0.037    2.118    2.120
 dbcsr_finalize                     304  7.8    0.049    0.061    1.799    1.964
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=97.046000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2697.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.625334E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265558.
 MP_Allreduce         3049                  15663.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.041    0.058   98.942   98.943
 qs_energies                          1  2.0    0.000    0.000   98.175   98.181
 ls_scf                               1  3.0    0.000    0.000   96.214   96.225
 dbcsr_multiply_generic             111  6.7    0.017    0.018   77.747   77.994
 ls_scf_main                          1  4.0    0.000    0.000   62.067   62.070
 multiply_cannon                    111  7.7    0.078    0.152   55.207   60.709
 density_matrix_trs4                  2  5.0    0.002    0.003   54.886   54.991
 multiply_cannon_loop               111  8.7    0.069    0.075   50.821   52.550
 mp_waitall_1                      5436 11.0   26.452   31.954   26.452   31.954
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.582   30.585
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.248   29.296
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   27.084   27.094
 multiply_cannon_multrec            444  9.7   14.082   16.501   21.102   22.790
 make_m2s                           222  7.7    0.004    0.005   17.818   20.188
 make_images                        222  8.7    3.721    4.419   17.756   20.130
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.785   16.133
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001    6.458   14.929
 make_images_data                   222  9.7    0.003    0.004   10.011   12.353
 hybrid_alltoall_any                227 10.6    0.789    3.750    9.854   12.279
 dbcsr_mm_accdrv_process           3003 10.4    0.175    0.341    6.727    7.862
 multiply_cannon_sync_h2d           444  9.7    6.576    7.656    6.576    7.656
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.413    7.520    6.413    7.520
 mp_allgather_i34                   111  8.7    2.629    6.997    2.629    6.997
 arnoldi_extremal                     4  6.8    0.000    0.000    5.711    5.723
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.711    5.723
 build_subspace                      16  8.4    0.015    0.020    5.325    5.337
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.543    4.667
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.706    4.502
 mp_irecv_dv                       1241 11.2    1.686    4.479    1.686    4.479
 mp_sum_l                           807  5.4    2.765    4.391    2.765    4.391
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.020    4.135    4.303
 dbcsr_matrix_vector_mult_local     304 10.0    3.669    4.111    3.671    4.113
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.595    3.695
 calculate_norms                    792  9.8    3.563    3.660    3.563    3.660
 ls_scf_post                          1  4.0    0.000    0.000    3.565    3.571
 make_images_sizes                  222  9.7    0.000    0.000    1.163    3.510
 mp_alltoall_i44                    222 10.7    1.162    3.509    1.162    3.509
 ls_scf_store_result                  1  5.0    0.000    0.000    3.337    3.396
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.871    3.190
 dbcsr_finalize                     304  7.8    0.062    0.078    2.197    2.258
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.151    2.152
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.119    2.120
 qs_ks_build_kohn_sham_matrix         3  8.3    0.003    0.004    2.119    2.120
 dbcsr_merge_all                    275  8.9    0.473    0.522    2.049    2.096
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=98.943000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3594.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.770986E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284111.
 MP_Allreduce         3043                  21950.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.079    0.099  108.180  108.180
 qs_energies                          1  2.0    0.000    0.000  106.790  106.802
 ls_scf                               1  3.0    0.000    0.000  103.851  103.863
 dbcsr_multiply_generic             111  6.7    0.023    0.025   77.355   77.459
 ls_scf_main                          1  4.0    0.000    0.000   66.120   66.121
 density_matrix_trs4                  2  5.0    0.002    0.003   56.853   56.907
 multiply_cannon                    111  7.7    0.141    0.251   49.944   52.011
 multiply_cannon_loop               111  8.7    0.067    0.069   46.468   47.330
 ls_scf_init_scf                      1  4.0    0.000    0.000   34.084   34.085
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   32.419   32.433
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.600   29.614
 mp_waitall_1                      4527 11.1   22.231   26.057   22.231   26.057
 make_m2s                           222  7.7    0.005    0.005   23.845   24.849
 make_images                        222  8.7    4.585    4.967   23.739   24.741
 multiply_cannon_multrec            444  9.7   17.894   18.398   22.486   22.953
 hybrid_alltoall_any                227 10.6    1.658    3.624   12.900   15.532
 make_images_data                   222  9.7    0.003    0.003   13.108   15.530
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.542   11.043
 multiply_cannon_sync_h2d           444  9.7    8.848    8.892    8.848    8.892
 arnoldi_extremal                     4  6.8    0.000    0.000    7.344    7.361
 arnoldi_normal_ev                    4  7.8    0.004    0.010    7.344    7.361
 build_subspace                      16  8.4    0.026    0.037    6.775    6.786
 dbcsr_matrix_vector_mult           304  9.0    0.016    0.033    5.421    5.588
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.242    5.331
 dbcsr_matrix_vector_mult_local     304 10.0    5.014    5.316    5.016    5.318
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.003    5.245
 dbcsr_mm_accdrv_process           1814 10.4    0.277    0.322    4.423    4.547
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.122    4.249    4.122    4.249
 ls_scf_post                          1  4.0    0.000    0.000    3.647    3.658
 mp_allgather_i34                   111  8.7    1.134    3.581    1.134    3.581
 make_images_sizes                  222  9.7    0.000    0.000    1.420    3.467
 mp_alltoall_i44                    222 10.7    1.420    3.467    1.420    3.467
 ls_scf_store_result                  1  5.0    0.000    0.000    3.399    3.407
 calculate_norms                    792  9.8    3.239    3.274    3.239    3.274
 dbcsr_finalize                     304  7.8    0.082    0.089    3.080    3.149
 dbcsr_merge_all                    275  8.9    0.888    0.915    2.867    2.930
 qs_energies_init_hamiltonians        1  3.0    0.002    0.002    2.909    2.909
 dbcsr_complete_redistribute          5  7.6    1.432    1.469    2.761    2.874
 dbcsr_data_release               12724 10.6    2.327    2.864    2.327    2.864
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.586    2.588
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.411    2.531
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.519    2.521
 qs_ks_build_kohn_sham_matrix         3  8.3    0.001    0.001    2.519    2.521
 dbcsr_sort_data                    325 11.1    2.439    2.490    2.439    2.490
 dbcsr_new_transposed                 4  7.5    0.243    0.253    2.274    2.286
 dbcsr_frobenius_norm                74  6.6    2.056    2.137    2.192    2.231
 dbcsr_add_d                        103  6.2    0.000    0.000    2.129    2.202
 dbcsr_add_anytype                  103  7.2    0.859    0.891    2.129    2.202
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=108.180000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=6842.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: bc2b9ad9f307a2d1e1ed8475fc27b058679d7d07
Summary: empty
Status: OK