=== This is the CP2K Performance-Test ===


Already up to date.
Current branch master is up to date.


Already up to date.
Current branch master is up to date.

 GIT Revision: 183bfa028a10482cb2ce2952b7487d62cedfbee6


################# ARCHITECTURE FILE ##################
#!/bin/bash
#
# CP2K arch file for Cray-XC50 (Piz Daint, CSCS, GPU partition)
#
# Tested with: GNU 9.3.0, Cray-MPICH 7.7.18, Cray-libsci 20.09.1,
#              Cray-FFTW 3.3.8.10, COSMA 2.6.6, ELPA 2023.05.001,
#              HDF5 1.14.2, LIBINT 2.6.0, LIBPEXSI 1.2.0,
#              LIBXC 6.2.2, LIBVORI 220621, LIBXSMM 1.17,
#              PLUMED 2.9.0, SIRIUS 7.4.3, SPGLIB 1.16.2
#
# Usage: Source this arch file and then run make as instructed.
#        A full toolchain installation is performed as default.
#        Replace or adapt the "module add" commands below if needed.
#
# Last update: 04.10.2023
#
# \
   if [ "${0}" = "${BASH_SOURCE}" ]; then \
      echo "ERROR: Script ${0##*/} must be sourced"; \
      echo "Usage: source ${0##*/}"; \
      exit 1; \
   fi; \
   this_file=${BASH_SOURCE##*/}; \
   if [ -n "${1}" ]; then \
      gcc_version="${1}"; \
   else \
      gcc_version="9.3.0"; \
   fi; \
   module add daint-gpu; \
   module rm PrgEnv-cray; \
   module add PrgEnv-gnu; \
   module rm gcc; \
   module add gcc/${gcc_version}; \
   module add cray-fftw/3.3.8.10; \
   module add cudatoolkit; \
   echo "Expected setup:"; \
   echo "   cray-mpich/7.7.18"; \
   echo "   craype-haswell"; \
   echo "   daint-gpu/21.09"; \
   echo "   craype/2.7.10"; \
   echo "   cray-libsci/20.09.1"; \
   echo "   PrgEnv-gnu/6.0.10"; \
   echo "   gcc/${gcc_version}"; \
   echo "   cray-fftw/3.3.8.10"; \
   echo "   cudatoolkit/11.0.2_3.38-8.1__g5b73779"; \
   module list; \
   module -f save cp2k_gpu_gnu_psmp; \
   echo "To load the required modules in your batch job script, use:"; \
   echo "   module restore cp2k_gpu_gnu_psmp"; \
   cd tools/toolchain; \
   ./install_cp2k_toolchain.sh --enable-cuda=yes --gpu-ver=P100 -j${maxtasks} --no-arch-files --with-gcc=system --with-libvdwxc --with-pexsi --with-plumed; \
   cd ../..; \
   printf "Sourcing ${PWD}/tools/toolchain/install/setup ... "; \
   source ${PWD}/tools/toolchain/install/setup; \
   printf "done\n"; \
   echo "Check the output above for error messages and consistency!"; \
   echo; \
   echo "If everything is OK, you can build a CP2K production binary with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.}"; \
   echo; \
   echo "Alternatively, you can add further checks, e.g. for regression testing, with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} DO_CHECKS=yes"; \
   echo "or build CP2K as a library with"; \
   echo "   make -j ARCH=${this_file%.*} VERSION=${this_file##*.} libcp2k"; \
   echo; \
   return

# Set options
DO_CHECKS      := no
USE_ACC        := yes
USE_COSMA      := 2.6.6
USE_ELPA       := 2023.05.001
USE_HDF5       := 1.14.2
USE_LIBINT     := 2.6.0
USE_LIBPEXSI   := 1.2.0
USE_LIBVORI    := 220621
USE_LIBXC      := 6.2.2
USE_LIBXSMM    := 1.17
USE_PLUMED     := 2.9.0
#USE_QUIP       := 0.9.10
USE_SIRIUS     := 7.4.3
USE_SPGLIB     := 1.16.2
# Only needed for SIRIUS
LIBVDWXC_VER   := 0.4.0
SPFFT_VER      := 1.0.6
SPLA_VER       := 1.5.5
# Only needed for LIBPEXSI
SCOTCH_VER     := 6.0.0
SUPERLU_VER    := 6.1.0

LMAX           := 5
MAX_CONTR      := 4

GPUVER         := P100
OFFLOAD_TARGET := cuda

CC             := cc
CXX            := CC
OFFLOAD_CC     := nvcc
FC             := ftn
LD             := ftn
AR             := ar -r

# cc, CC, and ftn include already the proper -march flag
CFLAGS         := -O2 -fopenmp -fopenmp-simd -ftree-vectorize -funroll-loops -g

DFLAGS         := -D__parallel
DFLAGS         += -D__SCALAPACK
DFLAGS         += -D__FFTW3
DFLAGS         += -D__MAX_CONTR=$(strip $(MAX_CONTR))

INSTALL_PATH   := $(PWD)/tools/toolchain/install

ifeq ($(DO_CHECKS), yes)
   DFLAGS         += -D__CHECK_DIAG
endif

ifeq ($(USE_ACC), yes)
   DFLAGS         += -D__DBCSR_ACC
   DFLAGS         += -D__OFFLOAD_CUDA
# Possibly no performance gain with PW_CUDA currently
   DFLAGS         += -D__NO_OFFLOAD_PW
endif

ifneq ($(USE_PLUMED),)
   USE_PLUMED     := $(strip $(USE_PLUMED))
   PLUMED_LIB     := $(INSTALL_PATH)/plumed-$(USE_PLUMED)/lib
   DFLAGS         += -D__PLUMED2
   USE_GSL        := 2.7
   LIBS           += $(PLUMED_LIB)/libplumed.a
endif

ifneq ($(USE_ELPA),)
   USE_ELPA       := $(strip $(USE_ELPA))
   TARGET         := nvidia
   ELPA_INC       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/include/elpa-$(USE_ELPA)
   ELPA_LIB       := $(INSTALL_PATH)/elpa-$(USE_ELPA)/$(TARGET)/lib
   CFLAGS         += -I$(ELPA_INC)/elpa -I$(ELPA_INC)/modules
   DFLAGS         += -D__ELPA
   ifeq ($(TARGET), nvidia)
      DFLAGS         += -D__ELPA_NVIDIA_GPU
   endif
   LIBS           += $(ELPA_LIB)/libelpa.a
endif

ifneq ($(USE_QUIP),)
   USE_QUIP       := $(strip $(USE_QUIP))
   QUIP_INC       := $(INSTALL_PATH)/quip-$(USE_QUIP)/include
   QUIP_LIB       := $(INSTALL_PATH)/quip-$(USE_QUIP)/lib
   CFLAGS         += -I$(QUIP_INC)
   DFLAGS         += -D__QUIP
   LIBS           += $(QUIP_LIB)/libquip_core.a
   LIBS           += $(QUIP_LIB)/libatoms.a
   LIBS           += $(QUIP_LIB)/libFoX_sax.a
   LIBS           += $(QUIP_LIB)/libFoX_common.a
   LIBS           += $(QUIP_LIB)/libFoX_utils.a
   LIBS           += $(QUIP_LIB)/libFoX_fsys.a
endif

ifneq ($(USE_LIBPEXSI),)
   USE_LIBPEXSI   := $(strip $(USE_LIBPEXSI))
   SCOTCH_VER     := $(strip $(SCOTCH_VER))
   SUPERLU_VER    := $(strip $(SUPERLU_VER))
   LIBPEXSI_INC   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/include
   LIBPEXSI_LIB   := $(INSTALL_PATH)/pexsi-$(USE_LIBPEXSI)/lib
   SCOTCH_INC     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/include
   SCOTCH_LIB     := $(INSTALL_PATH)/scotch-$(SCOTCH_VER)/lib
   SUPERLU_INC    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/include
   SUPERLU_LIB    := $(INSTALL_PATH)/superlu_dist-$(SUPERLU_VER)/lib
   CFLAGS         += -I$(LIBPEXSI_INC) -I$(SCOTCH_INC) -I$(SUPERLU_INC)
   DFLAGS         += -D__LIBPEXSI
   LIBS           += $(LIBPEXSI_LIB)/libpexsi.a
   LIBS           += $(SUPERLU_LIB)/libsuperlu_dist.a
   LIBS           += $(SCOTCH_LIB)/libptscotchparmetis.a
   LIBS           += $(SCOTCH_LIB)/libptscotch.a
   LIBS           += $(SCOTCH_LIB)/libptscotcherr.a
   LIBS           += $(SCOTCH_LIB)/libscotchmetis.a
   LIBS           += $(SCOTCH_LIB)/libscotch.a
endif

ifneq ($(USE_LIBVORI),)
   USE_LIBVORI    := $(strip $(USE_LIBVORI))
   LIBVORI_LIB    := $(INSTALL_PATH)/libvori-$(USE_LIBVORI)/lib
   DFLAGS         += -D__LIBVORI
   LIBS           += $(LIBVORI_LIB)/libvori.a
endif

ifneq ($(USE_LIBXC),)
   USE_LIBXC      := $(strip $(USE_LIBXC))
   LIBXC_INC      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/include
   LIBXC_LIB      := $(INSTALL_PATH)/libxc-$(USE_LIBXC)/lib
   CFLAGS         += -I$(LIBXC_INC)
   DFLAGS         += -D__LIBXC
   LIBS           += $(LIBXC_LIB)/libxcf03.a
   LIBS           += $(LIBXC_LIB)/libxc.a
endif

ifneq ($(USE_LIBINT),)
   USE_LIBINT     := $(strip $(USE_LIBINT))
   LMAX           := $(strip $(LMAX))
   LIBINT_INC     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/include
   LIBINT_LIB     := $(INSTALL_PATH)/libint-v$(USE_LIBINT)-cp2k-lmax-$(LMAX)/lib
   CFLAGS         += -I$(LIBINT_INC)
   DFLAGS         += -D__LIBINT
   LIBS           += $(LIBINT_LIB)/libint2.a
endif

ifneq ($(USE_SPGLIB),)
   USE_SPGLIB     := $(strip $(USE_SPGLIB))
   SPGLIB_INC     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/include
   SPGLIB_LIB     := $(INSTALL_PATH)/spglib-$(USE_SPGLIB)/lib
   CFLAGS         += -I$(SPGLIB_INC)
   DFLAGS         += -D__SPGLIB
   LIBS           += $(SPGLIB_LIB)/libsymspg.a
endif

ifneq ($(USE_LIBXSMM),)
   USE_LIBXSMM    := $(strip $(USE_LIBXSMM))
   LIBXSMM_INC    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/include
   LIBXSMM_LIB    := $(INSTALL_PATH)/libxsmm-$(USE_LIBXSMM)/lib
   CFLAGS         += -I$(LIBXSMM_INC)
   DFLAGS         += -D__LIBXSMM
   LIBS           += $(LIBXSMM_LIB)/libxsmmf.a
   LIBS           += $(LIBXSMM_LIB)/libxsmm.a
endif

ifneq ($(USE_SIRIUS),)
   USE_SIRIUS     := $(strip $(USE_SIRIUS))
   LIBVDWXC_VER   := $(strip $(LIBVDWXC_VER))
   LIBVDWXC_INC   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/include
   LIBVDWXC_LIB   := $(INSTALL_PATH)/libvdwxc-$(LIBVDWXC_VER)/lib
   SPFFT_VER      := $(strip $(SPFFT_VER))
   SPFFT_INC      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/include
   SPLA_VER       := $(strip $(SPLA_VER))
   SPLA_INC       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/include/spla
   ifeq ($(USE_ACC), yes)
      DFLAGS         += -D__OFFLOAD_GEMM
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib/cuda
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib/cuda
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include/cuda
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib/cuda
   else
      SPFFT_LIB      := $(INSTALL_PATH)/SpFFT-$(SPFFT_VER)/lib
      SPLA_LIB       := $(INSTALL_PATH)/SpLA-$(SPLA_VER)/lib
      SIRIUS_INC     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/include
      SIRIUS_LIB     := $(INSTALL_PATH)/sirius-$(USE_SIRIUS)/lib
   endif
   CFLAGS         += -I$(LIBVDWXC_INC)
   CFLAGS         += -I$(SPFFT_INC)
   CFLAGS         += -I$(SPLA_INC)
   CFLAGS         += -I$(SIRIUS_INC)
   DFLAGS         += -D__LIBVDWXC
   DFLAGS         += -D__SPFFT
   DFLAGS         += -D__SPLA
   DFLAGS         += -D__SIRIUS
   LIBS           += $(SIRIUS_LIB)/libsirius.a
   LIBS           += $(SPLA_LIB)/libspla.a
   LIBS           += $(SPFFT_LIB)/libspfft.a
   LIBS           += $(LIBVDWXC_LIB)/libvdwxc.a
endif

ifneq ($(USE_HDF5),)
   USE_HDF5       := $(strip $(USE_HDF5))
   HDF5_INC       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/include
   HDF5_LIB       := $(INSTALL_PATH)/hdf5-$(USE_HDF5)/lib
   CFLAGS         += -I$(HDF5_INC)
   DFLAGS         += -D__HDF5
   LIBS           += $(HDF5_LIB)/libhdf5_fortran.a
   LIBS           += $(HDF5_LIB)/libhdf5_hl.a
   LIBS           += $(HDF5_LIB)/libhdf5.a
endif

ifneq ($(USE_COSMA),)
   USE_COSMA      := $(strip $(USE_COSMA))
   ifeq ($(USE_ACC), yes)
      USE_COSMA      := $(USE_COSMA)-cuda
   endif
   COSMA_INC      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/include
   COSMA_LIB      := $(INSTALL_PATH)/COSMA-$(USE_COSMA)/lib
   CFLAGS         += -I$(COSMA_INC)
   DFLAGS         += -D__COSMA
   LIBS           += $(COSMA_LIB)/libcosma_prefixed_pxgemm.a
   LIBS           += $(COSMA_LIB)/libcosma.a
   LIBS           += $(COSMA_LIB)/libcosta.a
   LIBS           += $(COSMA_LIB)/libTiled-MM.a
endif

ifneq ($(USE_GSL),)
   USE_GSL        := $(strip $(USE_GSL))
   GSL_INC        := $(INSTALL_PATH)/gsl-$(USE_GSL)/include
   GSL_LIB        := $(INSTALL_PATH)/gsl-$(USE_GSL)/lib
   CFLAGS         += -I$(GSL_INC)
   DFLAGS         += -D__GSL
   LIBS           += $(GSL_LIB)/libgsl.a
endif

CFLAGS         += $(DFLAGS)

CXXFLAGS       := $(CFLAGS) -std=c++11

OFFLOAD_FLAGS  := $(DFLAGS) -O3 -Xcompiler="-fopenmp" -arch sm_60 --std=c++11

FCFLAGS        := $(CFLAGS)
ifeq ($(shell [ $(shell gcc -dumpversion | cut -d. -f1) -gt 9 ] && echo yes), yes)
   FCFLAGS        += -fallow-argument-mismatch
endif
FCFLAGS        += -fbacktrace
FCFLAGS        += -ffree-form
FCFLAGS        += -ffree-line-length-none
FCFLAGS        += -fno-omit-frame-pointer
FCFLAGS        += -std=f2008

ifneq ($(CUDA_HOME),)
   CUDA_LIB       := $(CUDA_HOME)/lib64
   LDFLAGS        := $(FCFLAGS) -L$(CUDA_LIB) -Wl,-rpath=$(CUDA_LIB)
else
   LDFLAGS        := $(FCFLAGS)
endif

LIBS           += -lcusolver -lcudart -lnvrtc -lcuda -lcufft -lcublas -lrt
LIBS           += -lz -ldl -lpthread -lstdc++

# End
############### END ARCHITECTURE FILE ################


===== TESTS (description) =====
 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-RPA.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-dRPA-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/01
 job id: 50175986
 --- Point ---
 name: 10
 plot: h2o_32_ri_rpa_mp2
 regex: Total RI-RPA Time= 
 label: RI-RPA (8n/2r/6t)
 --- Point ---
 name: 11
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-RPA (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 RI-RPA/RI-MP2 correlation energy
 input file: benchmarks/QS_mp2_rpa/32-H2O/RI-MP2.inp
 required files: ['benchmarks/QS_mp2_rpa/32-H2O/BASIS_H2O', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32.xyz', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-PBE-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-HF-TZ.inp', 'benchmarks/QS_mp2_rpa/32-H2O/H2O-32-RI-MP2-TZ.inp']
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 15
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/02
 job id: 50175987
 --- Point ---
 name: 20
 plot: h2o_32_ri_rpa_mp2
 regex: Total MP2 Time= 
 label: RI-MP2 (8n/6r/2t)
 --- Point ---
 name: 21
 plot: h2o_32_ri_rpa_mp2_mem
 regex: Estimated peak process memory 
 label: RI-MP2 (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/03
 job id: 50175988
 --- Point ---
 name: 100
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 101
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/04
 job id: 50175989
 --- Point ---
 name: 102
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 103
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/05
 job id: 50175990
 --- Point ---
 name: 104
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 105
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/06
 job id: 50175991
 --- Point ---
 name: 106
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 107
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/07
 job id: 50175992
 --- Point ---
 name: 108
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 109
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-64 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-64.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 5
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/08
 job id: 50175993
 --- Point ---
 name: 110
 plot: h2o_64_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 111
 plot: h2o_64_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/09
 job id: 50175994
 --- Point ---
 name: 200
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 201
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/10
 job id: 50175995
 --- Point ---
 name: 202
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 203
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/11
 job id: 50175996
 --- Point ---
 name: 204
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 205
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/12
 job id: 50175997
 --- Point ---
 name: 206
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 207
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/13
 job id: 50175998
 --- Point ---
 name: 208
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 209
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-128 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-128.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/14
 job id: 50175999
 --- Point ---
 name: 210
 plot: h2o_128_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 211
 plot: h2o_128_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/15
 job id: 50176000
 --- Point ---
 name: 400
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 401
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/16
 job id: 50176001
 --- Point ---
 name: 402
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 403
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/17
 job id: 50176002
 --- Point ---
 name: 404
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 405
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/18
 job id: 50176003
 --- Point ---
 name: 406
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 407
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/19
 job id: 50176004
 --- Point ---
 name: 408
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 409
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-256 test - DBCSR dominated (MPI/OMP)
 input file: benchmarks/QS/H2O-256.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 30
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/20
 job id: 50176005
 --- Point ---
 name: 410
 plot: h2o_256_md
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 411
 plot: h2o_256_md_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/21
 job id: 50176006
 --- Point ---
 name: 500
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/12r/1t)
 --- Point ---
 name: 501
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 6
 # threads/rank = 2
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/22
 job id: 50176007
 --- Point ---
 name: 502
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/6r/2t)
 --- Point ---
 name: 503
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/6r/2t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 4
 # threads/rank = 3
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/23
 job id: 50176008
 --- Point ---
 name: 504
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/4r/3t)
 --- Point ---
 name: 505
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/4r/3t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 3
 # threads/rank = 4
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/24
 job id: 50176009
 --- Point ---
 name: 506
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/3r/4t)
 --- Point ---
 name: 507
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/3r/4t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 2
 # threads/rank = 6
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/25
 job id: 50176010
 --- Point ---
 name: 508
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/2r/6t)
 --- Point ---
 name: 509
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/2r/6t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: H2O-32 (NREP 3) linear scaling test (864 H2O)
 input file: benchmarks/QS_DM_LS/H2O-dft-ls.inp
 required files: []
 output file: result.log
 # nodes = 8
 # ranks/node = 1
 # threads/rank = 12
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/26
 job id: 50176011
 --- Point ---
 name: 510
 plot: h2o_32_nrep3_ls
 regex: CP2K  
 label: (8n/1r/12t)
 --- Point ---
 name: 511
 plot: h2o_32_nrep3_ls_mem
 regex: Estimated peak process memory 
 label: (8n/1r/12t)
 ~~~~~~~ END TEST ~~~~~~~

 ~~~~~~~~~ TEST ~~~~~~~~~
 description: 512 H2O (4 NVE MD steps on 64 nodes)
 input file: benchmarks/QS/00512_H2O/H2O-512_md.inp
 required files: []
 output file: result.log
 # nodes = 64
 # ranks/node = 12
 # threads/rank = 1
 nrepeat = 1
 time[min] = 10
 run dir: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/27
 job id: 50176012
 --- Point ---
 name: 601
 plot: h2o_512_md
 regex: CP2K  
 label: (64n/12r/1t)
 --- Point ---
 name: 602
 plot: h2o_512_md_mem
 regex: Estimated peak process memory 
 label: (64n/12r/1t)
 ~~~~~~~ END TEST ~~~~~~~

=== END TESTS (description) ===


===== PLOTS (description) =====
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_ri_rpa_mp2_mem", title="32 H2O molecules (RI-MP2, RI-RPA)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_64_md_mem", title="64 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_128_md_mem", title="128 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_256_md_mem", title="256 H2O molecules (10 MD steps)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_32_nrep3_ls_mem", title="864 H2O molecules (LS SCF)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Time [s]"
 ~~~~~~~~~ PLOT ~~~~~~~~~
Plot: name="h2o_512_md_mem", title="512 H2O (4 NVE MD steps on 64 nodes)", xlabel="Revision", ylabel="Est. peak process memory [MiB]"
=== END PLOTS (description) ===


============ RESULTS ============
 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/01/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               15                 177869.
 MP_Allreduce          424                      8.
 MP_Sync                 3
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.017    0.033  134.384  134.384
 farming_run                          1  2.0  133.678  133.689  134.352  134.355
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32              4194304       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            154140672       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            159645696       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            208732160       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            212860928       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            212860928       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            227352576       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         896801644032       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         928925089792       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         928925089792       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         962100985856       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693169221632       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753639550976       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.164741E+12       0.0%      0.0%    100.0%
 flops max/rank                    447.801317E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249492158       0.0%      0.0%    100.0%
 number of processed stacks                164328       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1518.3
 marketing flops                     7.165779E+12
 -------------------------------------------------------------------------------
 # multiplications                           1160
 max memory usage/rank               1.488757E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                    2592
 MPI messages size (bytes):
  total size                         1.140326E+09
  min size                           0.000000E+00
  max size                           1.663488E+06
  average size                     439.940750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 132                        0
       128 < size <=     8192                 348                  2850816
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1536                179306496
    131072 < size <=  4194304                 576                958169088
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         2308                     54.
 MP_Alltoall          4670                 822215.
 MP_ISend             2604                  90577.
 MP_IRecv             2604                  90574.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              230                1134128.
 MP_Allreduce          571                1938539.
 MP_Sync                25
 MP_Alltoall            38                9316958.
 MP_SendRecv           120                 384007.
 MP_ISendRecv           45                 235435.
 MP_Wait               191
 MP_comm_split          10
 MP_ISend              127                3867574.
 MP_IRecv              127                3866554.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.007    0.028  115.416  115.416
 qs_energies                          1  2.0    0.000    0.000  115.127  115.130
 mp2_main                             1  3.0    0.000    0.000  113.066  113.069
 mp2_gpw_main                         1  4.0    0.027    0.033  112.068  112.071
 mp2_ri_gpw_compute_in                1  5.0    0.173    0.174   92.742   93.339
 mp2_ri_gpw_compute_in_loop           1  6.0    0.004    0.004   54.586   55.183
 mp2_eri_3c_integrate_gpw           272  7.0    0.152    0.166   40.997   46.281
 get_2c_integrals                     1  6.0    0.008    0.009   37.050   37.982
 integrate_v_rspace                 273  8.0    0.436    0.451   24.594   29.587
 pw_transfer                       6555 10.6    0.379    0.398   26.812   27.461
 fft_wrap_pw1pw2                   5465 11.4    0.045    0.048   25.441   26.172
 grid_integrate_task_list           273  9.0   20.509   25.959   20.509   25.959
 fft_wrap_pw1pw2_100               2178 12.4    0.027    0.028   22.981   23.696
 rpa_ri_compute_en                    1  5.0    0.019    0.021   19.223   19.488
 compute_2c_integrals                 1  7.0    0.002    0.002   19.175   19.177
 compute_2c_integrals_loop_lm         1  8.0    0.001    0.003   18.597   18.912
 mp2_eri_2c_integrate_gpw             1  9.0    2.381    2.415   18.595   18.911
 cp_fm_cholesky_decompose            12  8.2   17.783   18.732   17.783   18.732
 cholesky_decomp                      1  7.0    0.000    0.000   16.707   17.659
 fft3d_s                           5443 13.4   16.122   16.643   16.143   16.664
 ao_to_mo_and_store_B_mult_1        272  7.0   10.765   15.332   10.765   15.332
 calculate_wavefunction             272  8.0    5.399    5.553   12.316   13.003
 rpa_num_int                          1  6.0    0.001    0.011   10.590   10.590
 rpa_num_int_RPA_matrix_operati       8  7.0    0.000    0.000   10.576   10.583
 calc_mat_Q                           8  8.0    0.000    0.000    9.452    9.537
 contract_S_to_Q                      8  9.0    0.000    0.000    8.874    8.962
 calc_potential_gpw                 544  9.5    0.004    0.005    8.192    8.639
 parallel_gemm_fm                    14  9.1    0.000    0.000    8.457    8.556
 parallel_gemm_fm_cosma              14 10.1    8.457    8.556    8.457    8.556
 mp2_eri_2c_integrate_gpw_pot_l     272 10.0    0.001    0.001    8.070    8.336
 potential_pw2rs                    545 10.0    0.106    0.108    7.510    8.133
 create_integ_mat                     1  6.0    0.022    0.029    7.945    7.945
 collocate_single_gaussian          272 10.0    0.039    0.042    7.342    7.630
 array2fm                             1  7.0    0.000    0.000    6.747    7.338
 pw_scatter_s                      2720 13.7    4.332    4.488    4.332    4.488
 pw_gather_s                       2722 13.2    3.457    3.782    3.457    3.782
 array2fm_buffer_send                 1  8.0    3.023    3.187    3.023    3.187
 pw_poisson_solve                   545 10.5    1.119    1.181    2.194    2.347
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="10", plot="h2o_32_ri_rpa_mp2", label="RI-RPA (8n/2r/6t)", y=112.067373, yerr=0.000000
PlotPoint: name="11", plot="h2o_32_ri_rpa_mp2_mem", label="RI-RPA (8n/2r/6t)", y=2808.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/02/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         0.000000E+00       0.0%      0.0%      0.0%
 flops max/rank                      0.000000E+00       0.0%      0.0%      0.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                                  0       0.0%      0.0%      0.0%
 number of processed stacks                     0       0.0%      0.0%      0.0%
 average stack size                                     0.0       0.0       0.0
 marketing flops                     0.000000E+00
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                1                     12.
 MP_Allreduce           19                     21.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               22                 205321.
 MP_Allreduce          424                      9.
 MP_Sync                 4
 MP_comm_split           1
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.037    0.042  419.980  419.982
 farming_run                          1  2.0  419.175  419.182  419.925  419.928
 -------------------------------------------------------------------------------


 @@@@@@@@@@ Run number: 2 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32             16777216       0.0%      0.0%    100.0%
 flops    14 x    32 x    32            565182464       0.0%      0.0%    100.0%
 flops    29 x    32 x    32            585367552       0.0%      0.0%    100.0%
 flops    14 x    14 x    32            626196480       0.0%      0.0%    100.0%
 flops    29 x    14 x    32            638582784       0.0%      0.0%    100.0%
 flops    14 x    29 x    32            638582784       0.0%      0.0%    100.0%
 flops    29 x    29 x    32            682057728       0.0%      0.0%    100.0%
 flops    14 x    32 x    14         897827128576       0.0%      0.0%    100.0%
 flops    29 x    32 x    14         929989394432       0.0%      0.0%    100.0%
 flops    14 x    32 x    29         929989394432       0.0%      0.0%    100.0%
 flops    29 x    32 x    29         963203301376       0.0%      0.0%    100.0%
 flops    32 x    32 x    14        1693481172992       0.0%      0.0%    100.0%
 flops    32 x    32 x    29        1753962643456       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         7.172206E+12       0.0%      0.0%    100.0%
 flops max/rank                    150.696064E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          249788821       0.0%      0.0%    100.0%
 number of processed stacks                 98736       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    2529.9
 marketing flops                     7.174951E+12
 -------------------------------------------------------------------------------
 # multiplications                           1140
 max memory usage/rank               1.249559E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   61440
 MPI messages size (bytes):
  total size                         6.073508E+09
  min size                           0.000000E+00
  max size                         642.960000E+03
  average size                      98.852664E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               32004                        0
       128 < size <=     8192                1820                 14909440
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072               18640               1081442304
    131072 < size <=  4194304                8976               4977156096
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         1003                     44.
 MP_Alltoall          1797                 713538.
 MP_ISend             3686                  54943.
 MP_IRecv             3622                  54292.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group               12
 MP_Bcast              757                 478553.
 MP_Allreduce         2021                  21391.
 MP_Sync                37
 MP_Alltoall            77               14112249.
 MP_SendRecv          2876                2171486.
 MP_ISendRecv         1034                 172620.
 MP_Wait              1346
 MP_comm_split           7
 MP_ISend              264                 362227.
 MP_IRecv              264                 362718.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.034  217.088  217.089
 qs_energies                          1  2.0    0.002    0.017  216.869  216.877
 scf_env_do_scf                       1  3.0    0.000    0.000  114.971  114.972
 qs_ks_update_qs_env                  5  5.0    0.000    0.000  114.069  114.076
 rebuild_ks_matrix                    4  6.0    0.000    0.000  114.068  114.075
 qs_ks_build_kohn_sham_matrix         4  7.0    0.054    0.060  114.068  114.075
 hfx_ks_matrix                        4  8.0    0.001    0.001  113.716  113.720
 integrate_four_center                4  9.0    0.152    0.463  113.715  113.720
 integrate_four_center_main           4 10.0    0.115    0.519  101.719  105.634
 integrate_four_center_bin          263 11.0  101.604  105.506  101.604  105.506
 mp2_main                             1  3.0    0.004    0.031  101.604  101.614
 mp2_gpw_main                         1  4.0    0.055    0.106  100.707  100.720
 init_scf_loop                        1  4.0    0.000    0.000   97.243   97.243
 mp2_ri_gpw_compute_in                1  5.0    0.067    0.115   73.904   74.899
 mp2_ri_gpw_compute_in_loop           1  6.0    0.002    0.002   53.686   54.697
 mp2_eri_3c_integrate_gpw            91  7.0    0.143    0.163   41.257   46.439
 integrate_v_rspace                  95  8.0    0.396    0.573   27.745   32.787
 pw_transfer                       2240 10.6    0.144    0.165   29.525   29.963
 fft_wrap_pw1pw2                   1868 11.4    0.017    0.020   28.529   28.961
 ao_to_mo_and_store_B_mult_1         91  7.0   10.759   28.742   10.759   28.742
 mp2_ri_gpw_compute_en                1  5.0    0.054    0.065   26.597   28.614
 grid_integrate_task_list            95  9.0   23.161   28.415   23.161   28.415
 fft_wrap_pw1pw2_100                730 12.4    0.012    0.013   26.270   26.681
 mp2_ri_gpw_compute_en_RI_loop        1  6.0    1.838    1.901   24.937   24.947
 get_2c_integrals                     1  6.0    0.001    0.011   20.129   20.153
 compute_2c_integrals                 1  7.0    0.005    0.023   19.104   19.112
 compute_2c_integrals_loop_lm         1  8.0    0.001    0.002   18.681   18.944
 mp2_eri_2c_integrate_gpw             1  9.0    1.727    1.841   18.680   18.943
 fft3d_s                           1823 13.4   18.481   18.905   18.494   18.919
 scf_env_do_scf_inner_loop            4  4.0    0.000    0.001   17.727   17.727
 calculate_wavefunction              91  8.0    2.011    2.041    9.616    9.838
 mp2_ri_gpw_compute_en_expansio     172  7.0    0.555    0.584    8.798    9.420
 potential_pw2rs                    186 10.0    0.033    0.034    8.451    8.935
 local_gemm                         172  8.0    8.243    8.850    8.243    8.850
 mp2_eri_2c_integrate_gpw_pot_l      91 10.0    0.001    0.001    8.190    8.567
 mp2_ri_gpw_compute_en_comm          22  7.0    0.501    0.524    7.918    8.317
 calc_potential_gpw                 182  9.5    0.002    0.002    7.907    8.158
 collocate_single_gaussian           91 10.0    0.017    0.034    7.776    8.010
 mp_sync                             37 10.5    3.973    7.960    3.973    7.960
 integrate_four_center_load           4 10.0    0.000    0.000    6.760    6.763
 hfx_load_balance                     1 11.0    0.000    0.000    6.760    6.763
 mp2_ri_gpw_compute_en_ener         172  7.0    6.339    6.407    6.339    6.407
 mp_sendrecv_dm3                   2068  8.0    5.945    6.343    5.945    6.343
 pw_gather_s                        912 13.2    4.481    4.961    4.481    4.961
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="20", plot="h2o_32_ri_rpa_mp2", label="RI-MP2 (8n/6r/2t)", y=100.693406, yerr=0.000000
PlotPoint: name="21", plot="h2o_32_ri_rpa_mp2_mem", label="RI-MP2 (8n/6r/2t)", y=1509.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/03/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     29.277748E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               5055360       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      29.1
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             453.496832E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 9436608
 MPI messages size (bytes):
  total size                       333.233553E+09
  min size                           0.000000E+00
  max size                         315.840000E+03
  average size                      35.312852E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             4913240                        0
       128 < size <=     8192             1155432               9465298944
      8192 < size <=    32768             1984512              54190407680
     32768 < size <=   131072              551296              42776657920
    131072 < size <=  4194304              832128             226802306368
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3683                  62379.
 MP_Allreduce        10329                    270.
 MP_Sync               530
 MP_Alltoall          2083                 592243.
 MP_SendRecv         22610                   5520.
 MP_ISendRecv        22610                   5520.
 MP_Wait             37876
 MP_comm_split          50
 MP_ISend            20771                  42672.
 MP_IRecv            20771                  42672.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.008    0.027   53.949   53.950
 qs_mol_dyn_low                       1  2.0    0.003    0.004   53.747   53.756
 qs_forces                           11  3.9    0.002    0.002   53.674   53.674
 qs_energies                         11  4.9    0.006    0.011   52.174   52.181
 scf_env_do_scf                      11  5.9    0.000    0.001   45.921   45.922
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   43.874   43.874
 qs_scf_new_mos                     108  7.5    0.000    0.001   33.863   34.135
 qs_scf_loop_do_ot                  108  8.5    0.000    0.001   33.863   34.134
 dbcsr_multiply_generic            2286 12.5    0.094    0.098   33.498   33.974
 ot_scf_mini                        108  9.5    0.002    0.002   32.164   32.394
 multiply_cannon                   2286 13.5    0.189    0.199   26.223   28.048
 multiply_cannon_loop              2286 14.5    1.809    1.917   25.553   27.394
 velocity_verlet                     10  3.0    0.004    0.011   26.044   26.045
 ot_mini                            108 10.5    0.001    0.001   19.125   19.399
 qs_ot_get_derivative               108 11.5    0.001    0.001   16.130   16.335
 mp_waitall_1                    245248 16.5    8.250   14.809    8.250   14.809
 multiply_cannon_metrocomm3       54864 15.5    0.072    0.078    5.916   13.071
 multiply_cannon_multrec          54864 15.5    3.649    5.809    7.797   11.280
 qs_ot_get_p                        119 10.4    0.001    0.001    8.382    8.685
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.857    8.011
 qs_ks_build_kohn_sham_matrix       119  9.3    0.010    0.011    7.857    8.011
 mp_sum_l                          7287 12.8    5.324    7.074    5.324    7.074
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.900    7.036
 multiply_cannon_sync_h2d         54864 15.5    5.182    6.069    5.182    6.069
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    5.576    6.042
 dbcsr_mm_accdrv_process          76910 16.1    1.837    2.896    4.062    5.791
 qs_ot_p2m_diag                      50 11.0    0.004    0.006    5.443    5.474
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    5.316    5.431
 init_scf_run                        11  5.9    0.000    0.001    4.916    4.917
 scf_env_initial_rho_setup           11  6.9    0.002    0.007    4.916    4.916
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    4.594    4.594
 sum_up_and_integrate               119 10.3    0.001    0.002    4.460    4.468
 integrate_v_rspace                 119 11.3    0.002    0.002    4.449    4.459
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    4.380    4.380
 cp_fm_redistribute_end              50 14.0    2.232    4.351    2.239    4.354
 qs_rho_update_rho_low              119  7.7    0.000    0.001    4.115    4.250
 cp_fm_diag_elpa_base                50 14.0    2.109    4.246    2.114    4.250
 calculate_rho_elec                 119  8.7    0.011    0.017    4.115    4.249
 calculate_dm_sparse                119  9.5    0.000    0.000    3.036    3.193
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.928    3.126
 apply_single                       119 13.6    0.000    0.000    2.927    3.125
 multiply_cannon_metrocomm1       54864 15.5    0.055    0.060    1.683    3.058
 jit_kernel_multiply                 13 15.8    2.157    2.799    2.157    2.799
 acc_transpose_blocks             54864 15.5    0.223    0.245    2.252    2.792
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.728    2.733
 ot_diis_step                       108 11.5    0.006    0.006    2.719    2.720
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    2.447    2.516
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.416    2.418
 density_rs2pw                      119  9.7    0.004    0.004    2.146    2.257
 grid_integrate_task_list           119 12.3    2.024    2.120    2.024    2.120
 wfi_extrapolate                     11  7.9    0.001    0.001    2.118    2.118
 mp_sum_d                          4137 12.0    1.363    2.092    1.363    2.092
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.000    2.054
 init_scf_loop                       11  6.9    0.000    0.000    2.031    2.031
 potential_pw2rs                    119 12.3    0.004    0.004    1.820    1.834
 pw_transfer                       1439 11.6    0.051    0.056    1.681    1.749
 make_m2s                          4572 13.5    0.053    0.056    1.635    1.684
 fft_wrap_pw1pw2                   1201 12.6    0.007    0.007    1.606    1.676
 make_images                       4572 14.5    0.133    0.139    1.552    1.600
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.496    1.517
 transfer_rs2pw                     487 10.6    0.005    0.006    1.356    1.474
 mp_alltoall_d11v                  2130 13.8    1.311    1.454    1.311    1.454
 acc_transpose_blocks_sync       164592 16.5    1.201    1.440    1.201    1.440
 grid_collocate_task_list           119  9.7    1.352    1.430    1.352    1.430
 mp_waitany                       12084 13.8    1.237    1.391    1.237    1.391
 fft3d_ps                          1201 14.6    0.371    0.478    1.320    1.381
 transfer_pw2rs                     487 13.2    0.006    0.006    1.323    1.334
 fft_wrap_pw1pw2_140                487 13.2    0.107    0.113    1.246    1.319
 dbcsr_dot_sd                      1205 11.9    0.049    0.059    0.740    1.138
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="100", plot="h2o_64_md", label="(8n/12r/1t)", y=53.950000, yerr=0.000000
PlotPoint: name="101", plot="h2o_64_md_mem", label="(8n/12r/1t)", y=432.181818, yerr=1.113404
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/04/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     57.173320E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3066240       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      47.9
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             488.792064E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2194560
 MPI messages size (bytes):
  total size                       310.646604E+09
  min size                           0.000000E+00
  max size                           1.145520E+06
  average size                     141.553031E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              724648                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              281952               4619501568
     32768 < size <=   131072              494448              39143342080
    131072 < size <=  4194304              440000             264807943488
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3672                  62658.
 MP_Allreduce        10306                    303.
 MP_Sync                54
 MP_Alltoall          2060                 247987.
 MP_SendRecv         16779                  37093.
 MP_ISendRecv        16779                  37093.
 MP_Wait             23539
 MP_comm_split          50
 MP_ISend             5720                 128509.
 MP_IRecv             5720                 128509.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.029   38.707   38.709
 qs_mol_dyn_low                       1  2.0    0.003    0.004   38.452   38.459
 qs_forces                           11  3.9    0.002    0.003   38.392   38.392
 qs_energies                         11  4.9    0.002    0.007   36.724   36.728
 scf_env_do_scf                      11  5.9    0.001    0.002   31.670   31.671
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.006   29.244   29.245
 dbcsr_multiply_generic            2286 12.5    0.101    0.105   22.029   22.377
 qs_scf_new_mos                     108  7.5    0.001    0.001   20.695   20.913
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   20.695   20.913
 ot_scf_mini                        108  9.5    0.003    0.004   19.763   19.921
 multiply_cannon                   2286 13.5    0.209    0.217   16.820   18.302
 velocity_verlet                     10  3.0    0.001    0.002   18.226   18.227
 multiply_cannon_loop              2286 14.5    1.194    1.254   15.662   17.188
 ot_mini                            108 10.5    0.001    0.001   12.121   12.347
 mp_waitall_1                    200699 16.5    5.491   10.652    5.491   10.652
 qs_ot_get_derivative               108 11.5    0.001    0.001    9.756    9.918
 multiply_cannon_metrocomm3       27432 15.5    0.072    0.075    4.005    9.418
 multiply_cannon_multrec          27432 15.5    1.826    4.118    6.148    9.035
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.723    6.864
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.018    6.723    6.863
 dbcsr_mm_accdrv_process          47894 16.0    3.537    5.815    4.242    6.328
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.913    6.038
 qs_ot_get_p                        119 10.4    0.001    0.003    4.727    4.940
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.638    4.550
 mp_sum_l                          7287 12.8    2.125    4.122    2.125    4.122
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    3.068    4.049
 apply_single                       119 13.6    0.000    0.000    3.068    4.048
 init_scf_run                        11  5.9    0.000    0.001    3.848    3.849
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.848    3.849
 sum_up_and_integrate               119 10.3    0.001    0.002    3.654    3.659
 integrate_v_rspace                 119 11.3    0.002    0.002    3.640    3.645
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.571    3.598
 calculate_rho_elec                 119  8.7    0.021    0.024    3.570    3.597
 qs_ot_p2m_diag                      50 11.0    0.009    0.014    3.063    3.081
 make_m2s                          4572 13.5    0.051    0.053    2.638    2.964
 make_images                       4572 14.5    0.205    0.245    2.548    2.876
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    2.665    2.665
 init_scf_loop                       11  6.9    0.001    0.004    2.407    2.414
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.325    2.406
 calculate_first_density_matrix       1  7.0    0.000    0.002    2.340    2.343
 ot_diis_step                       108 11.5    0.010    0.011    2.317    2.318
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    2.314    2.315
 cp_fm_redistribute_end              50 14.0    1.171    2.285    1.174    2.287
 cp_fm_diag_elpa_base                50 14.0    1.081    2.196    1.109    2.229
 calculate_dm_sparse                119  9.5    0.000    0.001    2.079    2.161
 multiply_cannon_sync_h2d         27432 15.5    1.674    2.144    1.674    2.144
 acc_transpose_blocks             27432 15.5    0.113    0.120    1.607    1.938
 density_rs2pw                      119  9.7    0.004    0.004    1.868    1.933
 grid_integrate_task_list           119 12.3    1.838    1.923    1.838    1.923
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.889    1.891
 pw_transfer                       1439 11.6    0.065    0.069    1.828    1.866
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.737    1.777
 jit_kernel_multiply                  9 16.4    0.644    1.735    0.644    1.735
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.692    1.732
 make_images_data                  4572 15.5    0.047    0.053    1.237    1.611
 prepare_preconditioner              11  7.9    0.000    0.000    1.505    1.531
 make_preconditioner                 11  8.9    0.000    0.002    1.505    1.531
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.404    1.460
 hybrid_alltoall_any               4725 16.4    0.053    0.114    1.102    1.456
 wfi_extrapolate                     11  7.9    0.001    0.001    1.449    1.449
 fft_wrap_pw1pw2_140                487 13.2    0.127    0.131    1.392    1.433
 potential_pw2rs                    119 12.3    0.006    0.006    1.400    1.406
 fft3d_ps                          1201 14.6    0.521    0.574    1.368    1.399
 grid_collocate_task_list           119  9.7    1.290    1.371    1.290    1.371
 mp_alltoall_d11v                  2130 13.8    1.202    1.348    1.202    1.348
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.255    1.264
 qs_ot_get_orbitals                 108 10.5    0.000    0.001    1.215    1.261
 transfer_rs2pw                     487 10.6    0.005    0.005    1.053    1.148
 mp_sum_d                          4137 12.0    0.559    1.044    0.559    1.044
 mp_allgather_i34                  2286 14.5    0.582    0.969    0.582    0.969
 qs_energies_init_hamiltonians       11  5.9    0.001    0.003    0.939    0.939
 acc_transpose_blocks_sync        82296 16.5    0.816    0.938    0.816    0.938
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.871    0.885
 transfer_pw2rs                     487 13.2    0.004    0.005    0.870    0.873
 acc_transpose_blocks_kernels     27432 16.5    0.187    0.274    0.652    0.857
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="102", plot="h2o_64_md", label="(8n/6r/2t)", y=38.709000, yerr=0.000000
PlotPoint: name="103", plot="h2o_64_md_mem", label="(8n/6r/2t)", y=465.363636, yerr=1.666391
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/05/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                     59.051995E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3143552       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      46.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             520.892416E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  950976
 MPI messages size (bytes):
  total size                       203.844256E+09
  min size                           0.000000E+00
  max size                           1.638400E+06
  average size                     214.352688E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192              253512               2076770304
      8192 < size <=    32768              179424               2939682816
     32768 < size <=   131072              181440              14863564800
    131072 < size <=  4194304              330176             183964913216
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63490.
 MP_Allreduce        10155                    305.
 MP_Sync                54
 MP_Alltoall          1821                1607811.
 MP_SendRecv         11067                  57667.
 MP_ISendRecv        11067                  57667.
 MP_Wait             21987
 MP_ISend             9880                  92618.
 MP_IRecv             9880                  92618.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.032   32.529   32.531
 qs_mol_dyn_low                       1  2.0    0.003    0.004   32.218   32.225
 qs_forces                           11  3.9    0.003    0.007   32.158   32.158
 qs_energies                         11  4.9    0.005    0.031   30.610   30.612
 scf_env_do_scf                      11  5.9    0.003    0.023   25.828   25.832
 scf_env_do_scf_inner_loop          108  6.5    0.002    0.007   23.253   23.254
 dbcsr_multiply_generic            2286 12.5    0.096    0.098   16.927   17.031
 qs_scf_new_mos                     108  7.5    0.001    0.001   15.561   15.572
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   15.561   15.571
 velocity_verlet                     10  3.0    0.001    0.001   15.219   15.221
 ot_scf_mini                        108  9.5    0.002    0.003   14.797   14.812
 multiply_cannon                   2286 13.5    0.194    0.198   13.686   14.317
 multiply_cannon_loop              2286 14.5    0.869    0.911   12.924   13.664
 ot_mini                            108 10.5    0.001    0.001    9.112    9.128
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.622    7.636
 multiply_cannon_multrec          18288 15.5    1.881    2.874    6.949    7.224
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.021    6.035
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.017    6.020    6.035
 dbcsr_mm_accdrv_process          38222 16.0    4.908    5.688    4.974    5.753
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.318    5.331
 mp_waitall_1                    158411 16.6    2.684    3.835    2.684    3.835
 sum_up_and_integrate               119 10.3    0.003    0.016    3.563    3.576
 integrate_v_rspace                 119 11.3    0.003    0.003    3.548    3.555
 init_scf_run                        11  5.9    0.000    0.001    3.538    3.538
 scf_env_initial_rho_setup           11  6.9    0.001    0.002    3.538    3.538
 qs_ot_get_p                        119 10.4    0.001    0.001    3.443    3.468
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.905    3.446
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.346    3.356
 calculate_rho_elec                 119  8.7    0.031    0.031    3.346    3.355
 init_scf_loop                       11  6.9    0.001    0.005    2.556    2.562
 multiply_cannon_metrocomm3       18288 15.5    0.048    0.050    1.572    2.543
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.952    2.360
 apply_single                       119 13.6    0.000    0.000    1.951    2.359
 qs_ot_p2m_diag                      50 11.0    0.012    0.013    2.289    2.295
 calculate_first_density_matrix       1  7.0    0.007    0.055    2.247    2.291
 cp_dbcsr_syevd                      50 12.0    0.003    0.005    1.986    1.986
 density_rs2pw                      119  9.7    0.004    0.004    1.873    1.949
 make_m2s                          4572 13.5    0.044    0.045    1.819    1.946
 grid_integrate_task_list           119 12.3    1.802    1.906    1.802    1.906
 pw_transfer                       1439 11.6    0.065    0.068    1.875    1.887
 make_images                       4572 14.5    0.191    0.204    1.732    1.858
 calculate_dm_sparse                119  9.5    0.000    0.001    1.845    1.855
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.783    1.795
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.739    1.752
 acc_transpose_blocks             18288 15.5    0.079    0.081    1.637    1.743
 cp_fm_diag_elpa                     50 13.0    0.000    0.001    1.728    1.740
 cp_fm_diag_elpa_base                50 14.0    1.702    1.718    1.724    1.737
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.706    1.707
 prepare_preconditioner              11  7.9    0.000    0.000    1.693    1.695
 make_preconditioner                 11  8.9    0.000    0.001    1.693    1.695
 make_full_inverse_cholesky          11  9.9    0.000    0.001    1.551    1.635
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.511    1.516
 ot_diis_step                       108 11.5    0.011    0.012    1.471    1.471
 mp_sum_l                          7287 12.8    1.040    1.463    1.040    1.463
 fft_wrap_pw1pw2_140                487 13.2    0.178    0.182    1.427    1.438
 potential_pw2rs                    119 12.3    0.007    0.008    1.364    1.370
 fft3d_ps                          1201 14.6    0.547    0.566    1.338    1.349
 grid_collocate_task_list           119  9.7    1.236    1.300    1.236    1.300
 multiply_cannon_sync_h2d         18288 15.5    1.025    1.208    1.025    1.208
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.177    1.183
 wfi_extrapolate                     11  7.9    0.001    0.001    1.172    1.172
 transfer_rs2pw                     487 10.6    0.005    0.005    1.047    1.153
 qs_energies_init_hamiltonians       11  5.9    0.002    0.005    1.008    1.009
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.957    0.974
 make_images_data                  4572 15.5    0.047    0.051    0.775    0.919
 acc_transpose_blocks_sync        54864 16.5    0.747    0.846    0.747    0.846
 hybrid_alltoall_any               4725 16.4    0.058    0.116    0.665    0.828
 transfer_pw2rs                     487 13.2    0.004    0.004    0.822    0.827
 mp_alltoall_d11v                  2130 13.8    0.711    0.825    0.711    0.825
 acc_transpose_blocks_kernels     18288 16.5    0.217    0.222    0.792    0.799
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.777    0.779
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.671    0.734
 cp_fm_cholesky_invert               11 10.9    0.715    0.719    0.715    0.719
 mp_alltoall_z22v                  1201 16.6    0.639    0.709    0.639    0.709
 mp_waitany                        9880 13.7    0.575    0.695    0.575    0.695
 transfer_rs2pw_140                 130 11.5    0.115    0.118    0.564    0.660
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="104", plot="h2o_64_md", label="(8n/4r/3t)", y=32.531000, yerr=0.000000
PlotPoint: name="105", plot="h2o_64_md_mem", label="(8n/4r/3t)", y=496.181818, yerr=0.833196
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/06/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    114.044384E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               3805952       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0      38.6
 marketing flops                     2.107592E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             557.334528E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1042416
 MPI messages size (bytes):
  total size                       150.443262E+09
  min size                           0.000000E+00
  max size                           1.188816E+06
  average size                     144.321719E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              228256                        0
       128 < size <=     8192              126888               1039466496
      8192 < size <=    32768              191472               3137077248
     32768 < size <=   131072              295800              25899827200
    131072 < size <=  4194304              200000             120367247040
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63489.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                2412273.
 MP_SendRecv          8211                  74133.
 MP_ISendRecv         8211                  74133.
 MP_Wait             16271
 MP_ISend             7280                 135929.
 MP_IRecv             7280                 135929.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.009    0.028   36.159   36.161
 qs_mol_dyn_low                       1  2.0    0.003    0.004   35.990   35.999
 qs_forces                           11  3.9    0.002    0.002   35.928   35.928
 qs_energies                         11  4.9    0.001    0.001   34.227   34.233
 scf_env_do_scf                      11  5.9    0.000    0.001   29.296   29.298
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   25.944   25.944
 dbcsr_multiply_generic            2286 12.5    0.102    0.104   19.828   19.948
 velocity_verlet                     10  3.0    0.001    0.002   18.407   18.409
 qs_scf_new_mos                     108  7.5    0.001    0.001   17.875   17.932
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   17.874   17.931
 ot_scf_mini                        108  9.5    0.002    0.003   16.842   16.891
 multiply_cannon                   2286 13.5    0.218    0.225   16.118   16.413
 multiply_cannon_loop              2286 14.5    1.537    1.611   15.161   15.576
 ot_mini                            108 10.5    0.001    0.001   10.343   10.401
 multiply_cannon_multrec          27432 15.5    2.450    3.108    9.013    9.282
 qs_ot_get_derivative               108 11.5    0.001    0.001    8.476    8.526
 dbcsr_mm_accdrv_process          47916 15.9    6.053    7.609    6.461    7.809
 rebuild_ks_matrix                  119  8.3    0.000    0.000    6.304    6.355
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.012    6.304    6.354
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.586    5.631
 qs_ot_get_p                        119 10.4    0.001    0.001    3.639    3.711
 init_scf_run                        11  5.9    0.000    0.001    3.607    3.608
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.607    3.607
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    3.148    3.553
 sum_up_and_integrate               119 10.3    0.001    0.001    3.475    3.483
 integrate_v_rspace                 119 11.3    0.002    0.003    3.464    3.471
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.361    3.404
 calculate_rho_elec                 119  8.7    0.040    0.046    3.360    3.403
 init_scf_loop                       11  6.9    0.000    0.000    3.334    3.334
 acc_transpose_blocks             27432 15.5    0.119    0.122    2.420    2.565
 prepare_preconditioner              11  7.9    0.000    0.000    2.507    2.516
 make_preconditioner                 11  8.9    0.000    0.000    2.507    2.516
 make_full_inverse_cholesky          11  9.9    0.000    0.000    2.117    2.442
 make_m2s                          4572 13.5    0.054    0.056    2.259    2.386
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.347    2.374
 make_images                       4572 14.5    0.272    0.336    2.150    2.275
 mp_waitall_1                    137007 16.6    1.720    2.262    1.720    2.262
 qs_ot_p2m_diag                      50 11.0    0.015    0.023    2.225    2.239
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.102    2.227
 apply_single                       119 13.6    0.000    0.000    2.102    2.227
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.223    2.227
 calculate_dm_sparse                119  9.5    0.000    0.000    2.140    2.189
 pw_transfer                       1439 11.6    0.065    0.068    1.991    2.025
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    1.899    1.936
 density_rs2pw                      119  9.7    0.003    0.004    1.816    1.921
 grid_integrate_task_list           119 12.3    1.828    1.898    1.828    1.898
 ot_diis_step                       108 11.5    0.012    0.012    1.826    1.826
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.819    1.820
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.803    1.804
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.637    1.647
 fft_wrap_pw1pw2_140                487 13.2    0.227    0.233    1.569    1.607
 acc_transpose_blocks_sync        82296 16.5    1.449    1.589    1.449    1.589
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.580    1.588
 cp_fm_diag_elpa_base                50 14.0    1.545    1.561    1.578    1.586
 fft3d_ps                          1201 14.6    0.592    0.653    1.387    1.412
 grid_collocate_task_list           119  9.7    1.248    1.341    1.248    1.341
 wfi_extrapolate                     11  7.9    0.001    0.001    1.330    1.330
 multiply_cannon_metrocomm3       27432 15.5    0.040    0.041    0.757    1.316
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.297    1.312
 mp_sum_l                          7287 12.8    1.020    1.293    1.020    1.293
 potential_pw2rs                    119 12.3    0.008    0.009    1.283    1.285
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.235    1.246
 cp_fm_upper_to_full                 72 14.2    0.819    1.164    0.819    1.164
 jit_kernel_multiply                  6 16.6    0.340    1.072    0.340    1.072
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.068    1.069
 dbcsr_complete_redistribute        329 12.2    0.118    0.145    0.770    1.052
 transfer_rs2pw                     487 10.6    0.004    0.005    0.884    0.991
 make_images_data                  4572 15.5    0.048    0.053    0.830    0.959
 hybrid_alltoall_any               4725 16.4    0.066    0.156    0.709    0.905
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.798    0.876
 mp_alltoall_d11v                  2130 13.8    0.724    0.833    0.724    0.833
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    0.561    0.831
 acc_transpose_blocks_kernels     27432 16.5    0.269    0.276    0.823    0.830
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.806    0.811
 mp_alltoall_i22                    627 13.8    0.436    0.734    0.436    0.734
 cp_fm_cholesky_invert               11 10.9    0.727    0.730    0.727    0.730
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="106", plot="h2o_64_md", label="(8n/3r/4t)", y=36.161000, yerr=0.000000
PlotPoint: name="107", plot="h2o_64_md_mem", label="(8n/3r/4t)", y=529.181818, yerr=3.270201
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/07/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    117.977176E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1384136       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     106.2
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             617.091072E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  219456
 MPI messages size (bytes):
  total size                        97.042514E+09
  min size                           0.000000E+00
  max size                           3.276800E+06
  average size                     442.195750E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              101892               3336634368
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304              116112              93705670464
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8156                     20.
 MP_Alltoall          8655                  64935.
 MP_ISend            36532                 168375.
 MP_IRecv            36532                 168349.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63488.
 MP_Allreduce        10154                    346.
 MP_Sync                54
 MP_Alltoall          1582                3682667.
 MP_SendRecv          5355                  94533.
 MP_ISendRecv         5355                  94533.
 MP_Wait             11335
 MP_ISend             5200                 225425.
 MP_IRecv             5200                 225425.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.010    0.034   28.630   28.631
 qs_mol_dyn_low                       1  2.0    0.003    0.004   28.387   28.409
 qs_forces                           11  3.9    0.002    0.003   28.312   28.313
 qs_energies                         11  4.9    0.001    0.001   26.612   26.615
 scf_env_do_scf                      11  5.9    0.000    0.001   21.957   21.957
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   19.510   19.510
 velocity_verlet                     10  3.0    0.010    0.012   14.653   14.657
 dbcsr_multiply_generic            2286 12.5    0.094    0.097   13.122   13.253
 qs_scf_new_mos                     108  7.5    0.001    0.001   11.866   11.893
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   11.866   11.892
 ot_scf_mini                        108  9.5    0.002    0.002   11.162   11.189
 multiply_cannon                   2286 13.5    0.223    0.228   10.539   11.124
 multiply_cannon_loop              2286 14.5    0.642    0.660    9.618    9.856
 ot_mini                            108 10.5    0.001    0.001    6.528    6.562
 multiply_cannon_multrec           9144 15.5    1.677    1.924    5.943    6.235
 rebuild_ks_matrix                  119  8.3    0.000    0.000    5.778    5.798
 qs_ks_build_kohn_sham_matrix       119  9.3    0.012    0.013    5.778    5.798
 qs_ot_get_derivative               108 11.5    0.001    0.001    5.225    5.252
 qs_ks_update_qs_env                119  7.6    0.001    0.001    5.140    5.159
 dbcsr_mm_accdrv_process          12550 15.8    3.625    4.191    4.157    4.232
 sum_up_and_integrate               119 10.3    0.001    0.001    3.422    3.426
 integrate_v_rspace                 119 11.3    0.003    0.003    3.412    3.416
 qs_rho_update_rho_low              119  7.7    0.001    0.001    3.339    3.345
 calculate_rho_elec                 119  8.7    0.060    0.061    3.339    3.344
 init_scf_run                        11  5.9    0.000    0.001    3.219    3.219
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.219    3.219
 qs_ot_get_p                        119 10.4    0.001    0.001    2.747    2.777
 init_scf_loop                       11  6.9    0.000    0.000    2.427    2.428
 pw_transfer                       1439 11.6    0.065    0.068    2.108    2.116
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.063    2.064
 mp_waitall_1                    115863 16.7    1.501    2.058    1.501    2.058
 fft_wrap_pw1pw2                   1201 12.6    0.008    0.008    2.014    2.025
 make_m2s                          4572 13.5    0.033    0.034    1.793    1.951
 grid_integrate_task_list           119 12.3    1.873    1.936    1.873    1.936
 make_images                       4572 14.5    0.268    0.298    1.703    1.860
 density_rs2pw                      119  9.7    0.003    0.003    1.756    1.816
 qs_ot_p2m_diag                      50 11.0    0.022    0.023    1.779    1.781
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    1.724    1.749
 calculate_dm_sparse                119  9.5    0.000    0.000    1.731    1.748
 prepare_preconditioner              11  7.9    0.000    0.000    1.694    1.699
 make_preconditioner                 11  8.9    0.000    0.000    1.694    1.699
 fft_wrap_pw1pw2_140                487 13.2    0.324    0.332    1.669    1.681
 make_full_inverse_cholesky          11  9.9    0.000    0.000    1.585    1.610
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.539    1.539
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.537    1.538
 acc_transpose_blocks              9144 15.5    0.043    0.044    1.425    1.475
 fft3d_ps                          1201 14.6    0.649    0.662    1.368    1.377
 grid_collocate_task_list           119  9.7    1.294    1.374    1.294    1.374
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.362    1.373
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    1.337    1.351
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.308    1.316
 cp_fm_diag_elpa_base                50 14.0    1.281    1.297    1.306    1.314
 ot_diis_step                       108 11.5    0.013    0.013    1.289    1.289
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    1.238    1.271
 apply_single                       119 13.6    0.000    0.000    1.238    1.271
 potential_pw2rs                    119 12.3    0.010    0.011    1.235    1.238
 qs_energies_init_hamiltonians       11  5.9    0.002    0.002    1.226    1.227
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.123    1.127
 wfi_extrapolate                     11  7.9    0.001    0.001    1.105    1.105
 jit_kernel_multiply                  6 15.5    0.492    1.027    0.492    1.027
 make_images_data                  4572 15.5    0.042    0.046    0.784    0.948
 hybrid_alltoall_any               4725 16.4    0.065    0.177    0.720    0.940
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.000    0.873    0.923
 multiply_cannon_metrocomm3        9144 15.5    0.020    0.020    0.458    0.872
 cp_fm_cholesky_invert               11 10.9    0.849    0.852    0.849    0.852
 transfer_rs2pw                     487 10.6    0.004    0.004    0.768    0.838
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    0.814    0.820
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.757    0.759
 mp_alltoall_d11v                  2130 13.8    0.693    0.751    0.693    0.751
 acc_transpose_blocks_sync        27432 16.5    0.716    0.740    0.716    0.740
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    0.675    0.728
 acc_transpose_blocks_kernels      9144 16.5    0.117    0.120    0.651    0.675
 mp_allgather_i34                  2286 14.5    0.237    0.673    0.237    0.673
 transfer_pw2rs                     487 13.2    0.003    0.004    0.634    0.636
 mp_alltoall_z22v                  1201 16.6    0.593    0.629    0.593    0.629
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="108", plot="h2o_64_md", label="(8n/2r/6t)", y=28.631000, yerr=0.000000
PlotPoint: name="109", plot="h2o_64_md_mem", label="(8n/2r/6t)", y=586.636364, yerr=4.051324
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/08/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32          26877100032       0.0%      0.0%    100.0%
 flops     9 x     9 x    32          44168260608       0.0%      0.0%    100.0%
 flops    22 x     9 x    32          53835724800       0.0%      0.0%    100.0%
 flops     9 x    22 x    32          53885500416       0.0%      0.0%    100.0%
 flops    32 x    32 x     9          63568871424       0.0%      0.0%    100.0%
 flops    22 x    22 x    32          67007283200       0.0%      0.0%    100.0%
 flops    32 x    32 x    22          77695287296       0.0%      0.0%    100.0%
 flops     9 x    32 x    32          78422999040       0.0%      0.0%    100.0%
 flops    22 x    32 x    32          95850332160       0.0%      0.0%    100.0%
 flops     9 x    32 x     9         266263676928       0.0%      0.0%    100.0%
 flops    22 x    32 x     9         326697440256       0.0%      0.0%    100.0%
 flops     9 x    32 x    22         326697440256       0.0%      0.0%    100.0%
 flops    22 x    32 x    22         399918497792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                         1.880888E+12       0.0%      0.0%    100.0%
 flops max/rank                    235.585836E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          146984760       0.0%      0.0%    100.0%
 number of processed stacks               1388964       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     105.8
 marketing flops                     2.107587E+12
 -------------------------------------------------------------------------------
 # multiplications                           2286
 max memory usage/rank             804.106240E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   91440
 MPI messages size (bytes):
  total size                        85.748679E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     937.758938E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               21148                692256768
     32768 < size <=   131072               19224               1259864064
    131072 < size <=  4194304               41040              21941452800
   4194304 < size <= 16777216                9456              61855174464
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3622                  63723.
 MP_Allreduce        10154                    429.
 MP_Sync                54
 MP_Alltoall          1582                7383731.
 MP_SendRecv          2499                 189067.
 MP_ISendRecv         2499                 189067.
 MP_Wait              6399
 MP_ISend             3120                 546875.
 MP_IRecv             3120                 546875.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.030   42.682   42.683
 qs_mol_dyn_low                       1  2.0    0.003    0.004   42.478   42.487
 qs_forces                           11  3.9    0.002    0.002   42.416   42.418
 qs_energies                         11  4.9    0.001    0.001   40.403   40.408
 scf_env_do_scf                      11  5.9    0.001    0.001   34.653   34.653
 scf_env_do_scf_inner_loop          108  6.5    0.003    0.007   26.842   26.843
 velocity_verlet                     10  3.0    0.002    0.002   24.133   24.139
 dbcsr_multiply_generic            2286 12.5    0.102    0.104   18.893   19.065
 qs_scf_new_mos                     108  7.5    0.001    0.001   17.116   17.214
 qs_scf_loop_do_ot                  108  8.5    0.001    0.001   17.115   17.213
 ot_scf_mini                        108  9.5    0.002    0.002   15.954   16.054
 multiply_cannon                   2286 13.5    0.299    0.306   15.000   15.873
 multiply_cannon_loop              2286 14.5    0.845    0.866   13.729   14.642
 ot_mini                            108 10.5    0.001    0.001    9.706    9.820
 multiply_cannon_multrec           9144 15.5    3.400    4.684    8.776    8.892
 init_scf_loop                       11  6.9    0.000    0.000    7.785    7.789
 qs_ot_get_derivative               108 11.5    0.001    0.001    7.640    7.739
 rebuild_ks_matrix                  119  8.3    0.000    0.000    7.288    7.430
 qs_ks_build_kohn_sham_matrix       119  9.3    0.013    0.013    7.288    7.429
 prepare_preconditioner              11  7.9    0.000    0.000    6.796    6.809
 make_preconditioner                 11  8.9    0.000    0.000    6.796    6.809
 qs_ks_update_qs_env                119  7.6    0.001    0.001    6.567    6.694
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.374    6.689
 dbcsr_mm_accdrv_process          12550 15.8    4.515    6.470    5.241    6.564
 cp_fm_upper_to_full                 72 14.2    3.160    4.526    3.160    4.526
 qs_rho_update_rho_low              119  7.7    0.001    0.001    4.228    4.233
 calculate_rho_elec                 119  8.7    0.117    0.120    4.227    4.233
 sum_up_and_integrate               119 10.3    0.001    0.001    3.858    3.864
 integrate_v_rspace                 119 11.3    0.003    0.003    3.847    3.854
 init_scf_run                        11  5.9    0.000    0.001    3.675    3.675
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    3.675    3.675
 qs_ot_get_p                        119 10.4    0.001    0.001    3.333    3.465
 mp_waitall_1                     94719 16.7    2.299    3.284    2.299    3.284
 qs_ot_get_derivative_taylor         59 13.0    0.001    0.001    2.821    3.215
 pw_transfer                       1439 11.6    0.068    0.069    3.035    3.043
 fft_wrap_pw1pw2                   1201 12.6    0.009    0.009    2.937    2.945
 dbcsr_complete_redistribute        329 12.2    0.282    0.286    1.957    2.761
 make_m2s                          4572 13.5    0.037    0.038    2.369    2.540
 apply_preconditioner_dbcsr         119 12.6    0.000    0.000    2.214    2.528
 apply_single                       119 13.6    0.000    0.000    2.214    2.528
 fft_wrap_pw1pw2_140                487 13.2    0.649    0.653    2.505    2.514
 copy_fm_to_dbcsr                   176 11.2    0.001    0.001    1.653    2.461
 make_images                       4572 14.5    0.352    0.384    2.248    2.420
 density_rs2pw                      119  9.7    0.003    0.003    2.250    2.271
 calculate_dm_sparse                119  9.5    0.000    0.000    2.234    2.248
 mp_alltoall_i22                    627 13.8    1.450    2.240    1.450    2.240
 multiply_cannon_metrocomm3        9144 15.5    0.021    0.021    1.346    2.225
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.417    2.218
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.204    2.206
 grid_integrate_task_list           119 12.3    2.085    2.133    2.085    2.133
 qs_ot_get_derivative_diag           49 12.0    0.001    0.001    2.038    2.089
 ot_diis_step                       108 11.5    0.014    0.014    2.041    2.041
 qs_ot_p2m_diag                      50 11.0    0.043    0.044    1.959    1.961
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    1.935    1.936
 fft3d_ps                          1201 14.6    0.868    0.888    1.858    1.864
 acc_transpose_blocks              9144 15.5    0.044    0.045    1.787    1.811
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.776    1.778
 mp_sum_l                          7287 12.8    0.977    1.694    0.977    1.694
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    1.630    1.651
 cp_dbcsr_syevd                      50 12.0    0.003    0.003    1.606    1.607
 grid_collocate_task_list           119  9.7    1.525    1.554    1.525    1.554
 cp_fm_cholesky_invert               11 10.9    1.443    1.446    1.443    1.446
 wfi_extrapolate                     11  7.9    0.001    0.001    1.408    1.408
 potential_pw2rs                    119 12.3    0.014    0.014    1.357    1.359
 hybrid_alltoall_any               4725 16.4    0.091    0.150    1.086    1.347
 cp_fm_diag_elpa                     50 13.0    0.000    0.000    1.320    1.320
 cp_fm_diag_elpa_base                50 14.0    1.176    1.225    1.318    1.318
 qs_ot_get_orbitals                 108 10.5    0.000    0.000    1.300    1.317
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.000    0.000    1.240    1.245
 make_images_data                  4572 15.5    0.046    0.049    1.027    1.244
 mp_alltoall_d11v                  2130 13.8    1.182    1.218    1.182    1.218
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.108    1.126
 acc_transpose_blocks_sync        27432 16.5    1.075    1.101    1.075    1.101
 build_core_hamiltonian_matrix_      11  4.9    0.000    0.001    0.993    1.026
 jit_kernel_multiply                  6 15.5    0.696    1.014    0.696    1.014
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    0.931    0.944
 qs_create_task_list                 11  7.9    0.001    0.001    0.927    0.938
 generate_qs_task_list               11  8.9    0.368    0.387    0.926    0.938
 mp_alltoall_z22v                  1201 16.6    0.854    0.883    0.854    0.883
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="110", plot="h2o_64_md", label="(8n/1r/12t)", y=42.683000, yerr=0.000000
PlotPoint: name="111", plot="h2o_64_md_mem", label="(8n/1r/12t)", y=743.454545, yerr=20.951735
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/09/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    198.287135E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               8410880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     117.0
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             500.514816E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 8483040
 MPI messages size (bytes):
  total size                         1.160510E+12
  min size                           0.000000E+00
  max size                           1.161504E+06
  average size                     136.803609E+03
 MPI breakdown and total messages size (bytes):
             size <=      128             1836752                        0
       128 < size <=     8192             1040592               8524529664
      8192 < size <=    32768             1486976              24362614784
     32768 < size <=   131072             2491776             216971345920
    131072 < size <=  4194304             1626944             910632720448
   4194304 < size <= 16777216                   0                        0
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66212.
 MP_Allreduce         9776                    488.
 MP_Sync                52
 MP_Alltoall          1938                1384653.
 MP_SendRecv         20900                   9096.
 MP_ISendRecv        20900                   9096.
 MP_Wait             37268
 MP_ISend            14300                  82312.
 MP_IRecv            14300                  82312.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.009    0.030   83.550   83.552
 qs_mol_dyn_low                       1  2.0    0.003    0.004   83.282   83.312
 qs_forces                           11  3.9    0.003    0.003   83.214   83.215
 qs_energies                         11  4.9    0.001    0.002   80.305   80.325
 scf_env_do_scf                      11  5.9    0.000    0.001   71.365   71.368
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   65.666   65.667
 dbcsr_multiply_generic            2055 12.4    0.105    0.108   51.342   51.537
 qs_scf_new_mos                      99  7.5    0.000    0.001   48.153   48.292
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   48.152   48.292
 ot_scf_mini                         99  9.5    0.002    0.002   45.765   45.860
 velocity_verlet                     10  3.0    0.001    0.002   43.697   43.698
 multiply_cannon                   2055 13.4    0.184    0.190   42.562   43.401
 multiply_cannon_loop              2055 14.4    1.789    1.834   41.471   42.340
 ot_mini                             99 10.5    0.001    0.001   26.913   27.005
 qs_ot_get_derivative                99 11.5    0.002    0.009   20.061   20.144
 multiply_cannon_multrec          49320 15.4   11.311   12.102   17.399   18.178
 rebuild_ks_matrix                  110  8.3    0.000    0.000   14.782   14.917
 qs_ks_build_kohn_sham_matrix       110  9.3    0.011    0.012   14.782   14.917
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.966   13.086
 mp_waitall_1                    220248 16.4   11.020   11.897   11.020   11.897
 qs_ot_get_p                        110 10.4    0.001    0.001   10.320   10.419
 multiply_cannon_sync_h2d         49320 15.4    9.536   10.096    9.536   10.096
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    7.590    8.155
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    7.265    7.706
 apply_single                       110 13.6    0.000    0.000    7.265    7.706
 multiply_cannon_metrocomm3       49320 15.4    0.081    0.084    6.438    7.528
 sum_up_and_integrate               110 10.3    0.001    0.003    7.298    7.313
 integrate_v_rspace                 110 11.3    0.002    0.003    7.273    7.294
 qs_ot_p2m_diag                      48 11.0    0.012    0.018    7.023    7.065
 init_scf_run                        11  5.9    0.000    0.001    6.862    6.863
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.862    6.862
 qs_rho_update_rho_low              110  7.6    0.000    0.001    6.563    6.699
 calculate_rho_elec                 110  8.6    0.020    0.024    6.563    6.699
 ot_diis_step                        99 11.5    0.006    0.006    6.630    6.630
 dbcsr_mm_accdrv_process          87628 16.1    3.091    3.189    5.957    6.239
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    6.101    6.102
 init_scf_loop                       11  6.9    0.000    0.000    5.665    5.666
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    5.518    5.575
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    5.473    5.498
 cp_fm_diag_elpa_base                48 14.0    5.457    5.485    5.471    5.496
 mp_sum_l                          6594 12.7    4.185    4.871    4.185    4.871
 make_m2s                          4110 13.4    0.061    0.065    4.136    4.254
 make_images                       4110 14.4    0.177    0.190    4.040    4.159
 wfi_extrapolate                     11  7.9    0.001    0.001    4.101    4.101
 calculate_dm_sparse                110  9.5    0.001    0.001    3.756    3.860
 density_rs2pw                      110  9.6    0.004    0.004    3.601    3.824
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.590    3.594
 multiply_cannon_metrocomm1       49320 15.4    0.063    0.066    2.439    3.586
 prepare_preconditioner              11  7.9    0.000    0.000    3.491    3.510
 make_preconditioner                 11  8.9    0.000    0.000    3.491    3.510
 grid_integrate_task_list           110 12.3    3.267    3.419    3.267    3.419
 make_full_inverse_cholesky          11  9.9    0.000    0.000    3.274    3.322
 pw_transfer                       1331 11.6    0.055    0.067    3.210    3.309
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    3.241    3.295
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    3.203    3.236
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.121    3.224
 potential_pw2rs                    110 12.3    0.005    0.006    2.863    2.910
 fft_wrap_pw1pw2_140                451 13.1    0.309    0.332    2.621    2.730
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.657    2.662
 acc_transpose_blocks             49320 15.4    0.211    0.223    2.563    2.636
 jit_kernel_multiply                 13 15.9    2.580    2.615    2.580    2.615
 mp_alltoall_d11v                  2046 13.8    2.140    2.550    2.140    2.550
 fft3d_ps                          1111 14.6    0.800    0.890    2.416    2.504
 transfer_rs2pw                     451 10.6    0.005    0.006    2.082    2.405
 grid_collocate_task_list           110  9.6    2.157    2.283    2.157    2.283
 mp_waitany                       14300 13.8    1.839    2.269    1.839    2.269
 transfer_pw2rs                     451 13.1    0.006    0.007    1.982    2.017
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.946    1.979
 make_images_data                  4110 15.4    0.043    0.047    1.833    1.976
 cp_fm_cholesky_invert               11 10.9    1.899    1.904    1.899    1.904
 hybrid_alltoall_any               4261 16.3    0.083    0.480    1.587    1.836
 mp_sum_d                          3891 11.9    1.384    1.823    1.384    1.823
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.655    1.693
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="200", plot="h2o_128_md", label="(8n/12r/1t)", y=83.552000, yerr=0.000000
PlotPoint: name="201", plot="h2o_128_md_mem", label="(8n/12r/1t)", y=475.909091, yerr=2.108621
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/10/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    390.715586E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               5019072       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     196.1
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             594.669568E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1972800
 MPI messages size (bytes):
  total size                         1.077520E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     546.188250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192              222984               1826684928
      8192 < size <=    32768              520356              13399818240
     32768 < size <=   131072              372336              35386294272
    131072 < size <=  4194304              787758             788321309808
   4194304 < size <= 16777216               54450             238588003280
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66430.
 MP_Allreduce         9775                    566.
 MP_Sync                52
 MP_Alltoall          1717                3558822.
 MP_SendRecv         10340                  26400.
 MP_ISendRecv        10340                  26400.
 MP_Wait             22352
 MP_ISend            10164                 155761.
 MP_IRecv            10164                 155761.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.009    0.029   68.906   68.909
 qs_mol_dyn_low                       1  2.0    0.003    0.004   68.682   68.692
 qs_forces                           11  3.9    0.003    0.003   68.614   68.615
 qs_energies                         11  4.9    0.001    0.002   65.304   65.307
 scf_env_do_scf                      11  5.9    0.000    0.001   56.813   56.816
 scf_env_do_scf_inner_loop           99  6.5    0.002    0.007   49.225   49.226
 dbcsr_multiply_generic            2055 12.4    0.114    0.118   37.920   38.103
 velocity_verlet                     10  3.0    0.001    0.002   36.050   36.052
 qs_scf_new_mos                      99  7.5    0.001    0.001   33.357   33.481
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   33.356   33.480
 multiply_cannon                   2055 13.4    0.225    0.243   31.224   32.250
 ot_scf_mini                         99  9.5    0.003    0.003   31.682   31.831
 multiply_cannon_loop              2055 14.4    1.166    1.193   29.936   30.741
 ot_mini                             99 10.5    0.001    0.001   18.409   18.560
 multiply_cannon_multrec          24660 15.4    6.970    8.755   14.029   15.745
 rebuild_ks_matrix                  110  8.3    0.000    0.001   13.535   13.633
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.017   13.535   13.632
 qs_ot_get_derivative                99 11.5    0.001    0.001   12.639   12.785
 qs_ks_update_qs_env                110  7.6    0.001    0.001   11.914   12.011
 mp_waitall_1                    176588 16.5    7.456    9.898    7.456    9.898
 multiply_cannon_metrocomm3       24660 15.4    0.074    0.076    5.060    8.012
 init_scf_loop                       11  6.9    0.000    0.000    7.553    7.553
 multiply_cannon_sync_h2d         24660 15.4    6.350    7.502    6.350    7.502
 dbcsr_mm_accdrv_process          52282 16.1    5.434    6.287    6.888    7.203
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    6.445    7.055
 apply_single                       110 13.6    0.000    0.001    6.445    7.055
 qs_ot_get_p                        110 10.4    0.001    0.001    6.629    6.823
 sum_up_and_integrate               110 10.3    0.002    0.003    6.387    6.400
 integrate_v_rspace                 110 11.3    0.002    0.003    6.360    6.372
 init_scf_run                        11  5.9    0.000    0.001    6.126    6.127
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    6.126    6.126
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.906    5.915
 calculate_rho_elec                 110  8.6    0.039    0.047    5.905    5.915
 ot_diis_step                        99 11.5    0.010    0.010    5.722    5.723
 prepare_preconditioner              11  7.9    0.000    0.000    5.537    5.561
 make_preconditioner                 11  8.9    0.000    0.000    5.537    5.561
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    4.744    5.484
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.145    5.294
 make_m2s                          4110 13.4    0.057    0.059    4.284    4.745
 qs_ot_p2m_diag                      48 11.0    0.029    0.044    4.680    4.700
 make_images                       4110 14.4    0.404    0.462    4.171    4.630
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    4.212    4.212
 pw_transfer                       1331 11.6    0.066    0.074    3.582    3.729
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    3.633    3.647
 cp_fm_diag_elpa_base                48 14.0    3.584    3.600    3.630    3.644
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.009    3.475    3.623
 wfi_extrapolate                     11  7.9    0.001    0.001    3.523    3.523
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.328    3.404
 density_rs2pw                      110  9.6    0.004    0.004    3.148    3.327
 grid_integrate_task_list           110 12.3    3.163    3.320    3.163    3.320
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.002    3.230    3.231
 fft_wrap_pw1pw2_140                451 13.1    0.358    0.379    2.959    3.109
 calculate_dm_sparse                110  9.5    0.001    0.001    2.978    3.006
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.887    2.924
 hybrid_alltoall_any               4261 16.3    0.105    0.451    2.012    2.797
 make_images_data                  4110 15.4    0.048    0.052    2.301    2.787
 fft3d_ps                          1111 14.6    1.116    1.345    2.568    2.730
 cp_fm_cholesky_invert               11 10.9    2.600    2.607    2.600    2.607
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.500    2.503
 mp_sum_l                          6594 12.7    1.780    2.500    1.780    2.500
 grid_collocate_task_list           110  9.6    2.170    2.331    2.170    2.331
 potential_pw2rs                    110 12.3    0.008    0.009    2.256    2.266
 mp_alltoall_d11v                  2046 13.8    1.755    2.000    1.755    2.000
 acc_transpose_blocks             24660 15.4    0.115    0.119    1.966    1.995
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.972    1.991
 jit_kernel_multiply                 10 16.2    1.085    1.897    1.085    1.897
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    1.848    1.849
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.777    1.790
 multiply_cannon_metrocomm4       22605 15.4    0.074    0.078    0.783    1.690
 mp_allgather_i34                  2055 14.4    0.598    1.628    0.598    1.628
 transfer_rs2pw                     451 10.6    0.006    0.007    1.425    1.623
 mp_irecv_dv                      57340 16.2    0.659    1.572    0.659    1.572
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.540    1.548
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.002    1.384    1.487
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="202", plot="h2o_128_md", label="(8n/6r/2t)", y=68.909000, yerr=0.000000
PlotPoint: name="203", plot="h2o_128_md_mem", label="(8n/6r/2t)", y=561.727273, yerr=7.312392
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/11/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    404.681598E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               3346752       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     294.1
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             666.173440E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  854880
 MPI messages size (bytes):
  total size                       708.322787E+09
  min size                           0.000000E+00
  max size                           6.553600E+06
  average size                     828.564000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768              222984               7302414336
     32768 < size <=   131072              153888              10085203968
    131072 < size <=  4194304              389376             200257044480
   4194304 < size <= 16777216               82208             490679162176
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66421.
 MP_Allreduce         9774                    562.
 MP_Sync                52
 MP_Alltoall          1496                4511006.
 MP_SendRecv          6820                  27424.
 MP_ISendRecv         6820                  27424.
 MP_Wait             25498
 MP_ISend            17072                 115022.
 MP_IRecv            17072                 115022.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.032   59.226   59.227
 qs_mol_dyn_low                       1  2.0    0.003    0.004   58.959   58.972
 qs_forces                           11  3.9    0.003    0.003   58.888   58.889
 qs_energies                         11  4.9    0.001    0.001   55.743   55.746
 scf_env_do_scf                      11  5.9    0.000    0.001   47.925   47.925
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   39.513   39.513
 velocity_verlet                     10  3.0    0.001    0.002   32.190   32.192
 dbcsr_multiply_generic            2055 12.4    0.109    0.112   28.523   28.741
 qs_scf_new_mos                      99  7.5    0.001    0.001   24.979   25.073
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   24.979   25.073
 ot_scf_mini                         99  9.5    0.002    0.003   23.733   23.842
 multiply_cannon                   2055 13.4    0.212    0.221   22.390   23.556
 multiply_cannon_loop              2055 14.4    0.822    0.849   21.232   22.225
 ot_mini                             99 10.5    0.001    0.001   13.569   13.676
 rebuild_ks_matrix                  110  8.3    0.000    0.001   12.066   12.186
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   12.065   12.186
 multiply_cannon_multrec          16440 15.4    3.701    4.744    9.925   10.905
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.616   10.724
 mp_waitall_1                    139946 16.5    6.639    9.778    6.639    9.778
 qs_ot_get_derivative                99 11.5    0.005    0.013    9.186    9.293
 init_scf_loop                       11  6.9    0.000    0.000    8.380    8.380
 multiply_cannon_metrocomm3       16440 15.4    0.045    0.047    4.146    7.133
 prepare_preconditioner              11  7.9    0.000    0.000    6.642    6.657
 make_preconditioner                 11  8.9    0.000    0.000    6.642    6.657
 make_full_inverse_cholesky          11  9.9    0.000    0.000    6.009    6.362
 sum_up_and_integrate               110 10.3    0.001    0.002    6.246    6.259
 integrate_v_rspace                 110 11.3    0.003    0.003    6.220    6.234
 dbcsr_mm_accdrv_process          34862 16.1    5.367    5.730    6.067    6.153
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.699    5.710
 calculate_rho_elec                 110  8.6    0.059    0.059    5.698    5.709
 qs_ot_get_p                        110 10.4    0.001    0.001    5.357    5.489
 init_scf_run                        11  5.9    0.000    0.001    5.441    5.441
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.440    5.440
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.902    5.312
 apply_single                       110 13.6    0.000    0.000    4.901    5.312
 make_m2s                          4110 13.4    0.050    0.051    4.106    4.485
 make_images                       4110 14.4    0.397    0.521    3.987    4.363
 ot_diis_step                        99 11.5    0.011    0.011    4.352    4.353
 multiply_cannon_sync_h2d         16440 15.4    3.277    3.829    3.277    3.829
 qs_ot_p2m_diag                      48 11.0    0.042    0.044    3.757    3.761
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.124    3.751
 pw_transfer                       1331 11.6    0.066    0.073    3.581    3.592
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.474    3.487
 grid_integrate_task_list           110 12.3    3.200    3.439    3.200    3.439
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.392    3.392
 density_rs2pw                      110  9.6    0.004    0.004    2.941    3.196
 fft_wrap_pw1pw2_140                451 13.1    0.461    0.470    3.012    3.027
 wfi_extrapolate                     11  7.9    0.001    0.001    2.942    2.942
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.868    2.878
 cp_fm_diag_elpa_base                48 14.0    2.802    2.832    2.866    2.877
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.776    2.777
 make_images_data                  4110 15.4    0.046    0.050    2.294    2.776
 hybrid_alltoall_any               4261 16.3    0.108    0.383    2.077    2.764
 cp_fm_cholesky_invert               11 10.9    2.600    2.606    2.600    2.606
 calculate_dm_sparse                110  9.5    0.001    0.001    2.533    2.565
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.462    2.499
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.414    2.476
 calculate_first_density_matrix       1  7.0    0.000    0.001    2.408    2.409
 fft3d_ps                          1111 14.6    1.119    1.131    2.391    2.401
 grid_collocate_task_list           110  9.6    2.220    2.390    2.220    2.390
 multiply_cannon_metrocomm4       14385 15.4    0.047    0.050    0.872    2.307
 mp_alltoall_d11v                  2046 13.8    1.734    2.201    1.734    2.201
 mp_irecv_dv                      48980 15.7    0.798    2.173    0.798    2.173
 potential_pw2rs                    110 12.3    0.010    0.011    2.091    2.096
 mp_sum_l                          6594 12.7    1.400    1.982    1.400    1.982
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    1.946    1.947
 dbcsr_complete_redistribute        325 12.2    0.322    0.356    1.392    1.852
 cp_fm_upper_to_full                 70 14.2    1.396    1.757    1.396    1.757
 acc_transpose_blocks             16440 15.4    0.079    0.082    1.594    1.679
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.640    1.653
 cp_fm_cholesky_decompose            22 10.9    1.540    1.560    1.540    1.560
 mp_allgather_i34                  2055 14.4    0.473    1.517    0.473    1.517
 transfer_rs2pw                     451 10.6    0.005    0.006    1.249    1.496
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.471    1.483
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.359    1.451
 mp_waitany                       17072 13.8    1.114    1.379    1.114    1.379
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    0.919    1.370
 rs_gather_matrices                 110 12.3    0.137    0.150    0.877    1.350
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.268    1.276
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="204", plot="h2o_128_md", label="(8n/4r/3t)", y=59.227000, yerr=0.000000
PlotPoint: name="205", plot="h2o_128_md_mem", label="(8n/4r/3t)", y=630.000000, yerr=8.301150
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/12/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    601.317074E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               4916280       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     200.2
 marketing flops                    15.646302E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             740.179968E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  937080
 MPI messages size (bytes):
  total size                       523.723932E+09
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     558.889250E+03
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                 264                  2162688
      8192 < size <=    32768              304932               8165326848
     32768 < size <=   131072              110640               6338641920
    131072 < size <=  4194304              489498             400769458320
   4194304 < size <= 16777216               24750             108449092400
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66419.
 MP_Allreduce         9774                    603.
 MP_Sync                52
 MP_Alltoall          1496                5863162.
 MP_SendRecv          5060                  43184.
 MP_ISendRecv         5060                  43184.
 MP_Wait             20042
 MP_ISend            13376                 163145.
 MP_IRecv            13376                 163145.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.010    0.030   65.762   65.764
 qs_mol_dyn_low                       1  2.0    0.003    0.004   65.543   65.553
 qs_forces                           11  3.9    0.003    0.003   65.469   65.477
 qs_energies                         11  4.9    0.001    0.001   62.081   62.091
 scf_env_do_scf                      11  5.9    0.000    0.001   53.766   53.769
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   42.200   42.200
 velocity_verlet                     10  3.0    0.002    0.002   37.221   37.223
 dbcsr_multiply_generic            2055 12.4    0.116    0.121   30.674   30.880
 qs_scf_new_mos                      99  7.5    0.001    0.001   27.498   27.621
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   27.497   27.620
 ot_scf_mini                         99  9.5    0.002    0.003   25.809   25.904
 multiply_cannon                   2055 13.4    0.237    0.254   23.428   24.689
 multiply_cannon_loop              2055 14.4    1.412    1.451   22.063   22.720
 ot_mini                             99 10.5    0.001    0.001   14.760   14.880
 multiply_cannon_multrec          24660 15.4    4.066    6.924   13.005   14.176
 rebuild_ks_matrix                  110  8.3    0.000    0.000   12.028   12.144
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   12.027   12.144
 init_scf_loop                       11  6.9    0.000    0.000   11.526   11.527
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.628   10.730
 qs_ot_get_derivative                99 11.5    0.001    0.001   10.569   10.669
 prepare_preconditioner              11  7.9    0.000    0.000    9.792    9.811
 make_preconditioner                 11  8.9    0.000    0.000    9.792    9.811
 dbcsr_mm_accdrv_process          52304 16.0    7.756    9.159    8.782    9.780
 make_full_inverse_cholesky          11  9.9    0.000    0.000    8.002    9.477
 sum_up_and_integrate               110 10.3    0.001    0.002    6.261    6.274
 integrate_v_rspace                 110 11.3    0.003    0.003    6.235    6.247
 mp_waitall_1                    121746 16.5    4.303    6.172    4.303    6.172
 qs_ot_get_p                        110 10.4    0.001    0.001    5.897    6.043
 make_m2s                          4110 13.4    0.059    0.061    5.623    5.925
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.880    5.889
 calculate_rho_elec                 110  8.6    0.077    0.081    5.880    5.889
 make_images                       4110 14.4    0.571    0.695    5.481    5.778
 init_scf_run                        11  5.9    0.000    0.001    5.586    5.586
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.585    5.586
 cp_fm_upper_to_full                 70 14.2    3.367    4.883    3.367    4.883
 ot_diis_step                        99 11.5    0.011    0.011    4.136    4.137
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.045    4.123
 apply_single                       110 13.6    0.000    0.000    4.045    4.123
 qs_ot_p2m_diag                      48 11.0    0.055    0.064    4.023    4.039
 pw_transfer                       1331 11.6    0.065    0.072    3.797    3.830
 dbcsr_complete_redistribute        325 12.2    0.413    0.467    2.649    3.789
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.691    3.726
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    3.485    3.533
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.459    3.459
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    3.357    3.412
 grid_integrate_task_list           110 12.3    3.301    3.403    3.301    3.403
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    2.132    3.265
 density_rs2pw                      110  9.6    0.004    0.004    2.996    3.211
 fft_wrap_pw1pw2_140                451 13.1    0.530    0.543    3.164    3.202
 make_images_data                  4110 15.4    0.048    0.052    2.821    3.166
 hybrid_alltoall_any               4261 16.3    0.122    0.455    2.344    3.094
 multiply_cannon_metrocomm3       24660 15.4    0.037    0.038    1.330    3.031
 calculate_dm_sparse                110  9.5    0.001    0.001    2.990    3.025
 wfi_extrapolate                     11  7.9    0.001    0.001    3.010    3.010
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.919    2.928
 cp_fm_diag_elpa_base                48 14.0    2.771    2.833    2.916    2.927
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    1.782    2.899
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.869    2.870
 mp_alltoall_i22                    605 13.7    1.666    2.856    1.666    2.856
 cp_fm_cholesky_invert               11 10.9    2.598    2.606    2.598    2.606
 multiply_cannon_sync_h2d         24660 15.4    2.376    2.548    2.376    2.548
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.502    2.539
 fft3d_ps                          1111 14.6    1.151    1.191    2.518    2.538
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.478    2.482
 acc_transpose_blocks             24660 15.4    0.111    0.114    2.393    2.475
 grid_collocate_task_list           110  9.6    2.264    2.395    2.264    2.395
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.235    2.243
 potential_pw2rs                    110 12.3    0.012    0.013    2.072    2.075
 mp_alltoall_d11v                  2046 13.8    1.764    2.039    1.764    2.039
 qs_ot_get_orbitals                  99 10.5    0.001    0.001    1.791    1.825
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.610    1.706
 cp_fm_cholesky_decompose            22 10.9    1.639    1.683    1.639    1.683
 mp_allgather_i34                  2055 14.4    0.455    1.665    0.455    1.665
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.639    1.652
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.545    1.561
 mp_sum_l                          6594 12.7    0.933    1.550    0.933    1.550
 multiply_cannon_metrocomm4       20550 15.4    0.059    0.061    0.863    1.496
 acc_transpose_blocks_sync        73980 16.4    1.386    1.470    1.386    1.470
 jit_kernel_multiply                  8 15.4    0.679    1.431    0.679    1.431
 transfer_rs2pw                     451 10.6    0.005    0.006    1.185    1.416
 mp_irecv_dv                      62702 16.1    0.760    1.413    0.760    1.413
 mp_waitany                       13376 13.8    1.029    1.319    1.029    1.319
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="206", plot="h2o_128_md", label="(8n/3r/4t)", y=65.764000, yerr=0.000000
PlotPoint: name="207", plot="h2o_128_md_mem", label="(8n/3r/4t)", y=700.727273, yerr=11.160734
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/13/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                    807.299199E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1438408       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     684.2
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank             851.464192E+06
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  197280
 MPI messages size (bytes):
  total size                       339.125567E+09
  min size                           0.000000E+00
  max size                          13.107200E+06
  average size                       1.719006E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 132                  4325376
     32768 < size <=   131072               88656              11620319232
    131072 < size <=  4194304               89424             117209825280
   4194304 < size <= 16777216               17616             210291069504
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         7346                     33.
 MP_Alltoall          8043                 263767.
 MP_ISend            32836                 654203.
 MP_IRecv            32836                 654587.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3473                  66417.
 MP_Allreduce         9774                    644.
 MP_Sync                52
 MP_Alltoall          1496                8504061.
 MP_SendRecv          3300                  54848.
 MP_ISendRecv         3300                  54848.
 MP_Wait             13926
 MP_ISend             9240                 278857.
 MP_IRecv             9240                 278857.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.011    0.029   54.688   54.690
 qs_mol_dyn_low                       1  2.0    0.003    0.004   54.464   54.474
 qs_forces                           11  3.9    0.003    0.003   54.399   54.399
 qs_energies                         11  4.9    0.001    0.001   50.765   50.772
 scf_env_do_scf                      11  5.9    0.000    0.001   42.636   42.636
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   34.966   34.967
 velocity_verlet                     10  3.0    0.002    0.002   30.909   30.912
 dbcsr_multiply_generic            2055 12.4    0.107    0.109   23.092   23.228
 qs_scf_new_mos                      99  7.5    0.001    0.001   20.448   20.501
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   20.447   20.500
 ot_scf_mini                         99  9.5    0.002    0.002   19.197   19.218
 multiply_cannon                   2055 13.4    0.238    0.249   17.607   19.005
 multiply_cannon_loop              2055 14.4    0.605    0.626   16.335   16.654
 rebuild_ks_matrix                  110  8.3    0.000    0.000   11.620   11.641
 qs_ks_build_kohn_sham_matrix       110  9.3    0.012    0.013   11.620   11.641
 ot_mini                             99 10.5    0.001    0.001   10.653   10.667
 qs_ks_update_qs_env                110  7.6    0.001    0.001   10.339   10.358
 multiply_cannon_multrec           8220 15.4    3.189    4.345    7.586    8.534
 init_scf_loop                       11  6.9    0.000    0.000    7.626    7.629
 mp_waitall_1                    103326 16.6    5.771    7.266    5.771    7.266
 qs_ot_get_derivative                99 11.5    0.001    0.001    6.984    7.003
 sum_up_and_integrate               110 10.3    0.001    0.002    6.184    6.195
 integrate_v_rspace                 110 11.3    0.003    0.003    6.157    6.168
 prepare_preconditioner              11  7.9    0.000    0.000    5.983    5.988
 make_preconditioner                 11  8.9    0.000    0.000    5.983    5.988
 qs_rho_update_rho_low              110  7.6    0.001    0.001    5.951    5.968
 calculate_rho_elec                 110  8.6    0.112    0.113    5.950    5.967
 make_full_inverse_cholesky          11  9.9    0.000    0.000    5.570    5.643
 dbcsr_mm_accdrv_process          17442 15.9    3.179    4.151    4.258    5.184
 init_scf_run                        11  5.9    0.000    0.001    5.024    5.024
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.023    5.023
 qs_ot_get_p                        110 10.4    0.001    0.001    4.605    4.630
 multiply_cannon_metrocomm3        8220 15.4    0.018    0.019    2.980    4.575
 make_m2s                          4110 13.4    0.038    0.039    4.155    4.450
 make_images                       4110 14.4    0.644    0.701    4.025    4.321
 pw_transfer                       1331 11.6    0.066    0.070    4.049    4.057
 fft_wrap_pw1pw2                   1111 12.6    0.008    0.008    3.941    3.952
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    3.709    3.746
 apply_single                       110 13.6    0.000    0.000    3.709    3.746
 ot_diis_step                        99 11.5    0.012    0.012    3.648    3.648
 grid_integrate_task_list           110 12.3    3.374    3.525    3.374    3.525
 fft_wrap_pw1pw2_140                451 13.1    0.719    0.731    3.427    3.440
 qs_ot_p2m_diag                      48 11.0    0.081    0.084    3.264    3.267
 density_rs2pw                      110  9.6    0.004    0.004    2.992    3.147
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    2.928    2.928
 cp_fm_cholesky_invert               11 10.9    2.862    2.866    2.862    2.866
 wfi_extrapolate                     11  7.9    0.001    0.001    2.687    2.687
 hybrid_alltoall_any               4261 16.3    0.198    0.845    2.194    2.657
 qs_energies_init_hamiltonians       11  5.9    0.001    0.001    2.655    2.656
 make_images_data                  4110 15.4    0.040    0.046    2.234    2.602
 grid_collocate_task_list           110  9.6    2.364    2.525    2.364    2.525
 calculate_dm_sparse                110  9.5    0.001    0.001    2.475    2.524
 fft3d_ps                          1111 14.6    1.288    1.297    2.484    2.503
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.482    2.483
 multiply_cannon_sync_h2d          8220 15.4    2.381    2.449    2.381    2.449
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.426    2.434
 cp_fm_diag_elpa_base                48 14.0    2.370    2.397    2.424    2.432
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.241    2.242
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.129    2.143
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    1.986    2.012
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    1.782    1.994
 potential_pw2rs                    110 12.3    0.015    0.015    1.991    1.994
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    1.896    1.904
 mp_alltoall_d11v                  2046 13.8    1.588    1.778    1.588    1.778
 cp_fm_cholesky_decompose            22 10.9    1.682    1.697    1.682    1.697
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.609    1.612
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    1.489    1.604
 mp_allgather_i34                  2055 14.4    0.442    1.534    0.442    1.534
 dbcsr_complete_redistribute        325 12.2    0.556    0.587    1.408    1.497
 cp_dbcsr_plus_fm_fm_t_native        22  8.9    0.001    0.001    1.425    1.439
 acc_transpose_blocks              8220 15.4    0.041    0.042    1.361    1.389
 qs_create_task_list                 11  7.9    0.001    0.001    1.202    1.298
 generate_qs_task_list               11  8.9    0.373    0.440    1.201    1.297
 transfer_rs2pw                     451 10.6    0.005    0.005    1.094    1.274
 mp_waitany                        9240 13.8    1.048    1.241    1.048    1.241
 multiply_cannon_metrocomm1        8220 15.4    0.022    0.022    0.747    1.182
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.127    1.153
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="208", plot="h2o_128_md", label="(8n/2r/6t)", y=54.690000, yerr=0.000000
PlotPoint: name="209", plot="h2o_128_md_mem", label="(8n/2r/6t)", y=805.727273, yerr=11.070772
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/14/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32         184415158272       0.0%      0.0%    100.0%
 flops     9 x     9 x    32         269180485632       0.0%      0.0%    100.0%
 flops     9 x    22 x    32         349395425280       0.0%      0.0%    100.0%
 flops    22 x     9 x    32         350042406912       0.0%      0.0%    100.0%
 flops    22 x    22 x    32         453581815808       0.0%      0.0%    100.0%
 flops    32 x    32 x     9         465064427520       0.0%      0.0%    100.0%
 flops    32 x    32 x    22         568412078080       0.0%      0.0%    100.0%
 flops     9 x    32 x    32         572195340288       0.0%      0.0%    100.0%
 flops    22 x    32 x    32         699349860352       0.0%      0.0%    100.0%
 flops     9 x    32 x     9        1735942275072       0.0%      0.0%    100.0%
 flops    22 x    32 x     9        2216407818240       0.0%      0.0%    100.0%
 flops     9 x    32 x    22        2216407818240       0.0%      0.0%    100.0%
 flops    22 x    32 x    22        2803661053952       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        12.884056E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.612391E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                          984178160       0.0%      0.0%    100.0%
 number of processed stacks               1464624       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     672.0
 marketing flops                    15.646297E+12
 -------------------------------------------------------------------------------
 # multiplications                           2055
 max memory usage/rank               1.387241E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   82200
 MPI messages size (bytes):
  total size                       297.640985E+09
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       3.620936E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                  44                  1441792
     32768 < size <=   131072               18560               2432696320
    131072 < size <=  4194304               54216              84915781632
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            8808             210291069504
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3462                  67098.
 MP_Allreduce         9752                    812.
 MP_Sync                52
 MP_Alltoall          1474               16505187.
 MP_SendRecv          2310                 360267.
 MP_ISendRecv         2310                 360267.
 MP_Wait              5214
 MP_ISend             2420                1187840.
 MP_IRecv             2420                1187840.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.035   87.404   87.405
 qs_mol_dyn_low                       1  2.0    0.003    0.004   87.076   87.087
 qs_forces                           11  3.9    0.003    0.003   87.005   87.006
 qs_energies                         11  4.9    0.001    0.002   82.893   82.896
 scf_env_do_scf                      11  5.9    0.000    0.001   72.877   72.877
 velocity_verlet                     10  3.0    0.002    0.005   55.721   55.727
 scf_env_do_scf_inner_loop           99  6.5    0.003    0.007   44.502   44.503
 dbcsr_multiply_generic            2055 12.4    0.134    0.148   29.282   29.385
 init_scf_loop                       11  6.9    0.000    0.000   28.305   28.307
 qs_scf_new_mos                      99  7.5    0.001    0.001   26.633   26.668
 qs_scf_loop_do_ot                   99  8.5    0.001    0.001   26.632   26.667
 prepare_preconditioner              11  7.9    0.000    0.000   26.171   26.179
 make_preconditioner                 11  8.9    0.000    0.000   26.171   26.179
 make_full_inverse_cholesky          11  9.9    0.000    0.000   20.334   25.659
 ot_scf_mini                         99  9.5    0.002    0.002   24.841   24.866
 multiply_cannon                   2055 13.4    0.331    0.349   22.296   23.082
 multiply_cannon_loop              2055 14.4    0.812    0.833   20.531   20.826
 cp_fm_upper_to_full                 70 14.2   12.755   18.336   12.755   18.336
 rebuild_ks_matrix                  110  8.3    0.000    0.001   14.034   14.069
 qs_ks_build_kohn_sham_matrix       110  9.3    0.013    0.013   14.033   14.068
 ot_mini                             99 10.5    0.001    0.001   13.981   14.009
 qs_ks_update_qs_env                110  7.6    0.001    0.001   12.735   12.766
 dbcsr_complete_redistribute        325 12.2    1.001    1.030    7.376   10.603
 multiply_cannon_multrec           8220 15.4    4.055    4.215    9.753    9.892
 copy_fm_to_dbcsr                   174 11.2    0.001    0.001    6.379    9.599
 qs_ot_get_derivative                99 11.5    0.001    0.001    9.423    9.451
 transfer_fm_to_dbcsr                11  9.9    0.000    0.000    5.821    9.003
 mp_alltoall_i22                    605 13.7    5.457    8.695    5.457    8.695
 mp_waitall_1                     84994 16.7    7.347    8.164    7.347    8.164
 qs_rho_update_rho_low              110  7.6    0.001    0.001    7.464    7.500
 calculate_rho_elec                 110  8.6    0.222    0.223    7.464    7.500
 sum_up_and_integrate               110 10.3    0.002    0.002    6.903    6.916
 integrate_v_rspace                 110 11.3    0.004    0.004    6.875    6.887
 pw_transfer                       1331 11.6    0.075    0.075    5.798    5.805
 dbcsr_mm_accdrv_process          11614 15.7    3.887    4.121    5.550    5.767
 init_scf_run                        11  5.9    0.000    0.001    5.745    5.745
 scf_env_initial_rho_setup           11  6.9    0.001    0.001    5.745    5.745
 make_m2s                          4110 13.4    0.043    0.043    5.273    5.736
 fft_wrap_pw1pw2                   1111 12.6    0.009    0.009    5.682    5.689
 qs_ot_get_p                        110 10.4    0.001    0.001    5.596    5.631
 make_images                       4110 14.4    0.880    0.933    5.083    5.544
 apply_preconditioner_dbcsr         110 12.6    0.000    0.000    4.704    5.174
 apply_single                       110 13.6    0.000    0.000    4.704    5.174
 cp_fm_cholesky_invert               11 10.9    5.105    5.109    5.105    5.109
 multiply_cannon_metrocomm3        8220 15.4    0.019    0.019    4.763    5.056
 fft_wrap_pw1pw2_140                451 13.1    1.367    1.372    4.992    5.002
 ot_diis_step                        99 11.5    0.015    0.015    4.542    4.542
 qs_ot_p2m_diag                      48 11.0    0.151    0.156    4.046    4.051
 density_rs2pw                      110  9.6    0.004    0.004    3.934    3.953
 grid_integrate_task_list           110 12.3    3.673    3.749    3.673    3.749
 qs_energies_init_hamiltonians       11  5.9    0.003    0.003    3.663    3.664
 cp_dbcsr_syevd                      48 12.0    0.003    0.003    3.525    3.526
 hybrid_alltoall_any               4261 16.3    0.263    0.563    2.784    3.517
 qs_ot_get_derivative_taylor         52 13.0    0.001    0.001    2.997    3.458
 fft3d_ps                          1111 14.6    1.880    1.890    3.387    3.391
 make_images_data                  4110 15.4    0.045    0.048    2.749    3.349
 wfi_extrapolate                     11  7.9    0.001    0.001    3.330    3.330
 multiply_cannon_sync_h2d          8220 15.4    3.148    3.178    3.148    3.178
 calculate_dm_sparse                110  9.5    0.001    0.001    3.153    3.175
 cp_fm_diag_elpa                     48 13.0    0.000    0.000    2.936    2.936
 cp_fm_diag_elpa_base                48 14.0    2.393    2.600    2.933    2.933
 cp_dbcsr_sm_fm_multiply             37  9.5    0.001    0.001    2.914    2.917
 grid_collocate_task_list           110  9.6    2.665    2.688    2.665    2.688
 qs_ot_get_derivative_diag           47 12.0    0.001    0.001    2.630    2.646
 potential_pw2rs                    110 12.3    0.021    0.021    2.436    2.439
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    2.423    2.436
 calculate_first_density_matrix       1  7.0    0.000    0.000    2.300    2.300
 qs_env_update_s_mstruct             11  6.9    0.001    0.001    2.189    2.237
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    2.091    2.140
 mp_alltoall_d11v                  2046 13.8    2.026    2.087    2.026    2.087
 cp_fm_cholesky_decompose            22 10.9    1.966    1.994    1.966    1.994
 qs_create_task_list                 11  7.9    0.001    0.001    1.879    1.924
 generate_qs_task_list               11  8.9    0.731    0.786    1.879    1.924
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    1.896    1.899
 copy_dbcsr_to_fm                   151 11.3    0.003    0.003    1.732    1.767
 jit_kernel_multiply                 10 15.1    1.467    1.761    1.467    1.761
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="210", plot="h2o_128_md", label="(8n/1r/12t)", y=87.405000, yerr=0.000000
PlotPoint: name="211", plot="h2o_128_md_mem", label="(8n/1r/12t)", y=1258.000000, yerr=52.483764
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/15/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420241154048       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528896499712       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514757E+12       0.0%      0.0%    100.0%
 flops max/rank                      1.094965E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755939872       0.0%      0.0%    100.0%
 number of processed stacks              11950464       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     565.3
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             629.936128E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                10348896
 MPI messages size (bytes):
  total size                         4.491514E+12
  min size                           0.000000E+00
  max size                           4.537280E+06
  average size                     434.009000E+03
 MPI breakdown and total messages size (bytes):
             size <=      128               65736                        0
       128 < size <=     8192                1232                 10092544
      8192 < size <=    32768             3576680              95640223744
     32768 < size <=   131072             1294784              74079797248
    131072 < size <=  4194304             5148576            3175955098256
   4194304 < size <= 16777216              261888            1145794321408
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  57905.
 MP_Allreduce        11059                    797.
 MP_Sync                87
 MP_Alltoall          2226                2686109.
 MP_SendRecv         24320                  18752.
 MP_ISendRecv        24320                  18752.
 MP_Wait             42476
 MP_ISend            16020                 108028.
 MP_IRecv            16020                 108028.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.018    0.037  207.149  207.155
 qs_mol_dyn_low                       1  2.0    0.004    0.006  206.517  206.531
 qs_forces                           11  3.9    0.004    0.005  206.420  206.422
 qs_energies                         11  4.9    0.002    0.002  200.837  200.852
 scf_env_do_scf                      11  5.9    0.001    0.001  184.217  184.221
 scf_env_do_scf_inner_loop          117  6.6    0.005    0.012  163.817  163.819
 dbcsr_multiply_generic            2507 12.6    0.178    0.182  125.553  126.124
 qs_scf_new_mos                     117  7.6    0.001    0.001  124.386  124.664
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001  124.385  124.664
 velocity_verlet                     10  3.0    0.001    0.002  123.466  123.469
 ot_scf_mini                        117  9.6    0.003    0.003  117.615  117.848
 multiply_cannon                   2507 13.6    0.237    0.247  101.391  102.632
 multiply_cannon_loop              2507 14.6    2.381    2.454   99.268  100.316
 ot_mini                            117 10.6    0.001    0.001   66.097   66.322
 multiply_cannon_multrec          60168 15.6   31.742   33.437   41.508   43.380
 qs_ot_get_derivative               117 11.6    0.001    0.002   41.441   41.680
 rebuild_ks_matrix                  128  8.3    0.001    0.001   33.497   33.736
 qs_ks_build_kohn_sham_matrix       128  9.3    0.015    0.017   33.496   33.735
 mp_waitall_1                    267128 16.5   28.746   31.593   28.746   31.593
 qs_ks_update_qs_env                128  7.6    0.001    0.001   30.089   30.295
 qs_ot_get_p                        128 10.4    0.001    0.001   29.295   29.551
 multiply_cannon_sync_h2d         60168 15.6   26.488   28.408   26.488   28.408
 apply_preconditioner_dbcsr         128 12.6    0.000    0.001   24.252   25.131
 apply_single                       128 13.6    0.001    0.001   24.252   25.131
 ot_diis_step                       117 11.6    0.008    0.008   24.423   24.424
 qs_ot_p2m_diag                      83 11.4    0.079    0.092   22.708   22.802
 init_scf_loop                       11  6.9    0.000    0.001   20.327   20.329
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002   19.972   20.152
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   19.911   19.912
 multiply_cannon_metrocomm3       60168 15.6    0.116    0.123   16.021   17.879
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   16.949   16.981
 cp_fm_diag_elpa_base                83 14.4   16.876   16.909   16.944   16.978
 prepare_preconditioner              11  7.9    0.000    0.000   15.742   15.784
 make_preconditioner                 11  8.9    0.000    0.000   15.742   15.784
 make_full_inverse_cholesky          11  9.9    0.000    0.000   15.001   15.181
 make_m2s                          5014 13.6    0.105    0.113   13.904   14.349
 make_images                       5014 14.6    0.396    0.414   13.717   14.173
 sum_up_and_integrate               128 10.3    0.002    0.004   14.010   14.025
 integrate_v_rspace                 128 11.3    0.003    0.004   13.951   13.969
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.189   13.318
 calculate_rho_elec                 128  8.7    0.045    0.064   13.188   13.317
 init_scf_run                        11  5.9    0.000    0.001   12.485   12.486
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.485   12.486
 mp_sum_l                          7950 12.9    8.998   10.166    8.998   10.166
 dbcsr_mm_accdrv_process         124484 16.2    4.714    4.943    9.331    9.952
 wfi_extrapolate                     11  7.9    0.001    0.001    9.033    9.033
 cp_fm_cholesky_invert               11 10.9    8.988    8.996    8.988    8.996
 calculate_dm_sparse                128  9.5    0.001    0.001    8.668    8.781
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    8.177    8.249
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    8.077    8.207
 pw_transfer                       1547 11.6    0.074    0.087    7.652    7.837
 make_images_data                  5014 15.6    0.066    0.073    6.801    7.758
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    7.450    7.632
 multiply_cannon_metrocomm1       60168 15.6    0.092    0.096    6.178    7.547
 grid_integrate_task_list           128 12.3    7.091    7.482    7.091    7.482
 density_rs2pw                      128  9.7    0.006    0.006    6.818    7.315
 hybrid_alltoall_any               5200 16.5    0.292    2.259    5.977    7.260
 fft_wrap_pw1pw2_140                523 13.2    0.861    0.909    6.527    6.699
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.004    6.623    6.634
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.763    5.868
 fft3d_ps                          1291 14.7    2.205    2.875    5.450    5.769
 mp_alltoall_d11v                  2415 14.1    4.280    5.449    4.280    5.449
 grid_collocate_task_list           128  9.7    4.838    5.249    4.838    5.249
 cp_fm_cholesky_decompose            22 10.9    4.613    4.627    4.613    4.627
 potential_pw2rs                    128 12.3    0.009    0.010    4.513    4.540
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="400", plot="h2o_256_md", label="(8n/12r/1t)", y=207.155000, yerr=0.000000
PlotPoint: name="401", plot="h2o_256_md_mem", label="(8n/12r/1t)", y=597.272727, yerr=5.395131
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/16/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.183246E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               5975232       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1130.7
 marketing flops                   144.580175E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             843.816960E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 2406720
 MPI messages size (bytes):
  total size                         4.100942E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.703955E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               14916                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768               70860               2317615104
     32768 < size <=   131072              722992              55511613440
    131072 < size <=  4194304             1375664            1398181724160
   4194304 < size <= 16777216              154704            1463834332048
  16777216 < size                           67584            1181116006400
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58357.
 MP_Allreduce        11058                    960.
 MP_Sync                87
 MP_Alltoall          1969                4906255.
 MP_SendRecv         12032                  47072.
 MP_ISendRecv        12032                  47072.
 MP_Wait             25916
 MP_ISend            11748                 212467.
 MP_IRecv            11748                 212467.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.024    0.055  189.248  189.249
 qs_mol_dyn_low                       1  2.0    0.003    0.004  188.866  188.896
 qs_forces                           11  3.9    0.004    0.005  188.784  188.786
 qs_energies                         11  4.9    0.001    0.002  182.064  182.074
 scf_env_do_scf                      11  5.9    0.001    0.001  165.618  165.628
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.009  133.254  133.257
 velocity_verlet                     10  3.0    0.001    0.002  118.587  118.588
 dbcsr_multiply_generic            2507 12.6    0.252    0.259   97.740   98.966
 qs_scf_new_mos                     117  7.6    0.001    0.001   94.652   95.224
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   94.651   95.223
 ot_scf_mini                        117  9.6    0.004    0.004   89.841   90.564
 multiply_cannon                   2507 13.6    0.502    0.558   77.610   82.587
 multiply_cannon_loop              2507 14.6    1.573    1.642   74.242   76.622
 ot_mini                            117 10.6    0.001    0.001   50.490   51.136
 mp_waitall_1                    214728 16.6   24.010   39.175   24.010   39.175
 multiply_cannon_multrec          30084 15.6   21.103   26.038   31.750   37.070
 rebuild_ks_matrix                  128  8.3    0.001    0.001   32.281   32.995
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.019   32.280   32.995
 init_scf_loop                       11  6.9    0.000    0.000   32.275   32.276
 qs_ks_update_qs_env                128  7.6    0.001    0.001   29.039   29.683
 qs_ot_get_derivative               117 11.6    0.001    0.002   28.601   29.307
 multiply_cannon_metrocomm3       30084 15.6    0.099    0.106   15.609   29.068
 prepare_preconditioner              11  7.9    0.000    0.000   27.979   28.042
 make_preconditioner                 11  8.9    0.000    0.000   27.979   28.042
 make_full_inverse_cholesky          11  9.9    0.000    0.000   26.662   27.182
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   22.007   23.231
 apply_single                       128 13.6    0.001    0.001   22.007   23.230
 qs_ot_get_p                        128 10.4    0.001    0.001   21.371   22.187
 ot_diis_step                       117 11.6    0.014    0.015   21.714   21.716
 multiply_cannon_sync_h2d         30084 15.6   18.177   21.217   18.177   21.217
 qs_ot_p2m_diag                      83 11.4    0.188    0.217   16.434   16.468
 cp_fm_cholesky_invert               11 10.9   16.304   16.316   16.304   16.316
 make_m2s                          5014 13.6    0.086    0.092   14.081   15.762
 make_images                       5014 14.6    1.155    1.362   13.864   15.547
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   15.232   15.232
 sum_up_and_integrate               128 10.3    0.002    0.004   13.993   14.023
 integrate_v_rspace                 128 11.3    0.003    0.004   13.933   13.965
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.384   13.416
 calculate_rho_elec                 128  8.7    0.087    0.103   13.383   13.415
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   12.094   12.128
 cp_fm_diag_elpa_base                83 14.4   11.841   11.937   12.089   12.118
 qs_ot_get_derivative_diag           77 12.4    0.002    0.003   11.232   11.737
 init_scf_run                        11  5.9    0.000    0.001   11.665   11.666
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   11.664   11.666
 multiply_cannon_metrocomm4       27577 15.6    0.100    0.115    3.818   11.005
 dbcsr_mm_accdrv_process          62242 16.2    5.496    6.270   10.094   10.604
 mp_irecv_dv                      69486 16.3    3.618   10.600    3.618   10.600
 make_images_data                  5014 15.6    0.066    0.076    8.272   10.313
 hybrid_alltoall_any               5200 16.5    0.346    1.571    7.042    9.917
 pw_transfer                       1547 11.6    0.084    0.095    8.586    8.645
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.011    8.364    8.423
 wfi_extrapolate                     11  7.9    0.001    0.001    8.337    8.337
 density_rs2pw                      128  9.7    0.006    0.006    7.110    7.963
 grid_integrate_task_list           128 12.3    7.145    7.584    7.145    7.584
 fft_wrap_pw1pw2_140                523 13.2    0.935    0.956    7.391    7.463
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    6.283    7.142
 cp_fm_cholesky_decompose            22 10.9    6.838    6.911    6.838    6.911
 calculate_dm_sparse                128  9.5    0.001    0.001    6.466    6.629
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.325    6.333
 mp_sum_l                          7950 12.9    4.188    6.199    4.188    6.199
 fft3d_ps                          1291 14.7    2.827    3.003    5.920    5.971
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    5.295    5.473
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    5.361    5.439
 grid_collocate_task_list           128  9.7    4.996    5.373    4.996    5.373
 mp_alltoall_d11v                  2415 14.1    4.157    5.284    4.157    5.284
 mp_allgather_i34                  2507 14.6    1.895    4.859    1.895    4.859
 potential_pw2rs                    128 12.3    0.015    0.017    4.476    4.489
 dbcsr_complete_redistribute        395 12.7    0.858    0.984    3.253    4.106
 mp_sum_d                          4467 12.1    2.607    3.963    2.607    3.963
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="402", plot="h2o_256_md", label="(8n/6r/2t)", y=189.249000, yerr=0.000000
PlotPoint: name="403", plot="h2o_256_md_mem", label="(8n/6r/2t)", y=803.909091, yerr=2.353123
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/17/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.928533E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               3984192       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1695.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank             954.236928E+06
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                 1042912
 MPI messages size (bytes):
  total size                         2.716210E+12
  min size                           0.000000E+00
  max size                          26.214400E+06
  average size                       2.604448E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6424                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 264                  8650752
     32768 < size <=   131072              281856              36943429632
    131072 < size <=  4194304              660064             996105256960
   4194304 < size <= 16777216               65632             931530938576
  16777216 < size                           28672             751619276800
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58351.
 MP_Allreduce        11057                   1000.
 MP_Sync                87
 MP_Alltoall          1712                9388896.
 MP_SendRecv          7936                  75008.
 MP_ISendRecv         7936                  75008.
 MP_Wait             21820
 MP_ISend            11748                 275205.
 MP_IRecv            11748                 275205.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.019    0.042  176.434  176.435
 qs_mol_dyn_low                       1  2.0    0.004    0.004  175.969  175.981
 qs_forces                           11  3.9    0.005    0.013  175.871  175.873
 qs_energies                         11  4.9    0.002    0.007  169.340  169.350
 scf_env_do_scf                      11  5.9    0.001    0.002  153.810  153.810
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  118.734  118.735
 velocity_verlet                     10  3.0    0.004    0.006  112.050  112.052
 dbcsr_multiply_generic            2507 12.6    0.185    0.189   81.871   83.042
 qs_scf_new_mos                     117  7.6    0.001    0.001   81.479   81.818
 qs_scf_loop_do_ot                  117  8.6    0.001    0.002   81.478   81.818
 ot_scf_mini                        117  9.6    0.004    0.005   77.246   77.640
 multiply_cannon                   2507 13.6    0.504    0.521   62.165   66.726
 multiply_cannon_loop              2507 14.6    1.134    1.201   59.048   62.010
 ot_mini                            117 10.6    0.001    0.001   42.398   42.796
 init_scf_loop                       11  6.9    0.001    0.006   34.979   34.980
 mp_waitall_1                    170520 16.6   24.562   33.857   24.562   33.857
 prepare_preconditioner              11  7.9    0.000    0.000   30.959   31.009
 make_preconditioner                 11  8.9    0.000    0.002   30.959   31.009
 rebuild_ks_matrix                  128  8.3    0.001    0.001   30.294   30.824
 qs_ks_build_kohn_sham_matrix       128  9.3    0.016    0.023   30.293   30.823
 make_full_inverse_cholesky          11  9.9    0.000    0.000   28.619   30.000
 qs_ks_update_qs_env                128  7.6    0.001    0.001   27.309   27.792
 multiply_cannon_multrec          20056 15.6   13.065   16.877   22.405   26.228
 multiply_cannon_metrocomm3       20056 15.6    0.061    0.065   15.113   24.892
 qs_ot_get_derivative               117 11.6    0.002    0.002   22.922   23.317
 qs_ot_get_p                        128 10.4    0.001    0.002   20.147   20.661
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.488   20.538
 apply_single                       128 13.6    0.001    0.001   19.488   20.537
 ot_diis_step                       117 11.6    0.018    0.019   19.369   19.370
 make_m2s                          5014 13.6    0.078    0.084   14.691   16.083
 make_images                       5014 14.6    1.137    1.221   14.453   15.835
 qs_ot_p2m_diag                      83 11.4    0.265    0.272   15.704   15.713
 multiply_cannon_sync_h2d         20056 15.6   13.642   15.223   13.642   15.223
 cp_dbcsr_syevd                      83 12.4    0.005    0.005   14.637   14.638
 cp_fm_cholesky_invert               11 10.9   14.419   14.428   14.419   14.428
 sum_up_and_integrate               128 10.3    0.002    0.004   13.947   13.976
 integrate_v_rspace                 128 11.3    0.003    0.004   13.888   13.910
 qs_rho_update_rho_low              128  7.7    0.001    0.001   13.664   13.701
 calculate_rho_elec                 128  8.7    0.128    0.143   13.664   13.700
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   11.381   11.404
 cp_fm_diag_elpa_base                83 14.4   10.969   11.123   11.377   11.400
 make_images_data                  5014 15.6    0.063    0.073    8.824   10.603
 init_scf_run                        11  5.9    0.000    0.001   10.504   10.504
 scf_env_initial_rho_setup           11  6.9    0.001    0.003   10.504   10.504
 hybrid_alltoall_any               5200 16.5    0.445    2.014    7.689   10.213
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.104    9.395
 multiply_cannon_metrocomm4       17549 15.6    0.065    0.073    3.473    9.262
 mp_irecv_dv                      50230 16.2    3.345    9.008    3.345    9.008
 pw_transfer                       1547 11.6    0.084    0.105    8.885    8.985
 dbcsr_mm_accdrv_process          41502 16.2    5.650    5.985    8.797    8.929
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.010    8.662    8.767
 grid_integrate_task_list           128 12.3    7.326    7.796    7.326    7.796
 fft_wrap_pw1pw2_140                523 13.2    1.060    1.096    7.640    7.749
 density_rs2pw                      128  9.7    0.006    0.006    7.036    7.641
 wfi_extrapolate                     11  7.9    0.001    0.001    7.419    7.419
 cp_fm_cholesky_decompose            22 10.9    7.364    7.412    7.364    7.412
 cp_fm_upper_to_full                105 14.8    5.762    7.274    5.762    7.274
 dbcsr_complete_redistribute        395 12.7    1.161    1.190    4.585    6.348
 fft3d_ps                          1291 14.7    2.820    3.050    5.980    6.066
 calculate_dm_sparse                128  9.5    0.001    0.001    5.842    5.933
 grid_collocate_task_list           128  9.7    5.156    5.666    5.156    5.666
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.438    5.443
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.615    5.253
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.402    5.163
 mp_alltoall_d11v                  2415 14.1    4.281    4.938    4.281    4.938
 mp_allgather_i34                  2507 14.6    1.622    4.872    1.622    4.872
 mp_sum_l                          7950 12.9    3.188    4.743    3.188    4.743
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.551    4.721
 potential_pw2rs                    128 12.3    0.020    0.021    4.384    4.398
 transfer_fm_to_dbcsr                11  9.9    0.017    0.022    2.321    4.068
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.919    3.945
 mp_alltoall_i22                    716 14.1    1.938    3.883    1.938    3.883
 qs_energies_init_hamiltonians       11  5.9    0.002    0.005    3.745    3.745
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.484    3.534
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="404", plot="h2o_256_md", label="(8n/4r/3t)", y=176.435000, yerr=0.000000
PlotPoint: name="405", plot="h2o_256_md_mem", label="(8n/4r/3t)", y=906.545455, yerr=9.296760
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/18/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1430456039424       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1962800054272       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1986255912960       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1992003932160       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2753958699008       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4454954827776       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5444944789504       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5492290093056       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6712799002624       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11613072052224       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15239176077312       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15239176077312       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19911132921856       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        94.233020E+12       0.0%      0.0%    100.0%
 flops max/rank                      4.387242E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6806383904       0.0%      0.0%    100.0%
 number of processed stacks               6026880       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    1129.3
 marketing flops                   145.651870E+12
 -------------------------------------------------------------------------------
 # multiplications                           2529
 max memory usage/rank               1.151717E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                 1153224
 MPI messages size (bytes):
  total size                         2.039489E+12
  min size                           0.000000E+00
  max size                          17.653760E+06
  average size                       1.768511E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                6996                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 396                  8650752
     32768 < size <=   131072              322096              36390305792
    131072 < size <=  4194304              721976             792118951936
   4194304 < size <= 16777216               70800             669922227920
  16777216 < size                           30960             541065216000
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4039                  57682.
 MP_Allreduce        11174                   1079.
 MP_Sync                88
 MP_Alltoall          1724               12509605.
 MP_SendRecv          5934                  75008.
 MP_ISendRecv         5934                  75008.
 MP_Wait             22612
 MP_ISend            15064                 244788.
 MP_IRecv            15064                 244788.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.012    0.031  190.673  190.675
 qs_mol_dyn_low                       1  2.0    0.003    0.004  190.309  190.322
 qs_forces                           11  3.9    0.004    0.004  190.165  190.179
 qs_energies                         11  4.9    0.002    0.002  183.088  183.096
 scf_env_do_scf                      11  5.9    0.001    0.001  166.482  166.495
 velocity_verlet                     10  3.0    0.001    0.002  125.392  125.396
 scf_env_do_scf_inner_loop          118  6.6    0.003    0.008  120.157  120.158
 qs_scf_new_mos                     118  7.6    0.001    0.001   83.264   83.556
 qs_scf_loop_do_ot                  118  8.6    0.001    0.001   83.263   83.555
 dbcsr_multiply_generic            2529 12.6    0.194    0.199   81.094   81.825
 ot_scf_mini                        118  9.6    0.003    0.004   78.605   78.936
 multiply_cannon                   2529 13.6    0.555    0.588   56.115   59.116
 multiply_cannon_loop              2529 14.6    1.854    1.932   52.392   54.146
 init_scf_loop                       11  6.9    0.000    0.000   46.206   46.207
 ot_mini                            118 10.6    0.001    0.001   43.365   43.688
 prepare_preconditioner              11  7.9    0.000    0.000   42.140   42.166
 make_preconditioner                 11  8.9    0.000    0.000   42.140   42.166
 make_full_inverse_cholesky          11  9.9    0.010    0.019   35.766   40.828
 multiply_cannon_multrec          30348 15.6   13.691   19.098   26.693   31.730
 rebuild_ks_matrix                  129  8.3    0.001    0.001   29.619   29.900
 qs_ks_build_kohn_sham_matrix       129  9.3    0.016    0.019   29.619   29.900
 mp_waitall_1                    149172 16.7   17.206   27.206   17.206   27.206
 qs_ks_update_qs_env                129  7.6    0.001    0.001   26.767   27.015
 qs_ot_get_derivative               118 11.6    0.002    0.002   23.651   23.982
 qs_ot_get_p                        129 10.4    0.001    0.001   21.255   21.626
 make_m2s                          5058 13.6    0.093    0.098   20.526   21.443
 make_images                       5058 14.6    1.933    2.250   20.213   21.126
 apply_preconditioner_dbcsr         129 12.6    0.000    0.001   19.081   19.585
 apply_single                       129 13.6    0.001    0.001   19.080   19.584
 ot_diis_step                       118 11.6    0.018    0.018   19.582   19.584
 cp_fm_upper_to_full                106 14.8   11.559   17.042   11.559   17.042
 qs_ot_p2m_diag                      84 11.4    0.347    0.394   16.939   16.990
 cp_fm_cholesky_invert               11 10.9   16.009   16.018   16.009   16.018
 cp_dbcsr_syevd                      84 12.4    0.005    0.006   15.505   15.506
 multiply_cannon_metrocomm3       30348 15.6    0.049    0.053    6.282   15.057
 sum_up_and_integrate               129 10.3    0.002    0.003   14.175   14.203
 integrate_v_rspace                 129 11.3    0.003    0.006   14.115   14.144
 qs_rho_update_rho_low              129  7.7    0.001    0.001   14.018   14.079
 calculate_rho_elec                 129  8.7    0.171    0.186   14.018   14.079
 dbcsr_mm_accdrv_process          62780 16.2    8.547    9.284   12.572   13.098
 dbcsr_complete_redistribute        397 12.7    1.490    1.598    9.148   13.012
 make_images_data                  5058 15.6    0.065    0.072   10.976   12.585
 cp_fm_diag_elpa                     84 13.4    0.000    0.000   12.154   12.168
 cp_fm_diag_elpa_base                84 14.4   11.167   11.516   12.146   12.159
 multiply_cannon_sync_h2d         30348 15.6   10.625   11.678   10.625   11.678
 copy_fm_to_dbcsr                   210 11.7    0.001    0.002    7.739   11.556
 hybrid_alltoall_any               5245 16.5    0.522    2.140    9.796   11.444
 init_scf_run                        11  5.9    0.000    0.001   10.704   10.705
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.703   10.705
 qs_ot_get_derivative_diag           78 12.4    0.002    0.003   10.098   10.349
 transfer_fm_to_dbcsr                11  9.9    0.002    0.007    6.353   10.105
 mp_alltoall_i22                    720 14.1    5.638    9.503    5.638    9.503
 pw_transfer                       1559 11.6    0.086    0.103    9.344    9.437
 fft_wrap_pw1pw2                   1301 12.7    0.010    0.011    9.117    9.216
 fft_wrap_pw1pw2_140                527 13.2    1.257    1.291    8.081    8.197
 grid_integrate_task_list           129 12.3    7.595    7.930    7.595    7.930
 cp_fm_cholesky_decompose            22 10.9    7.476    7.585    7.476    7.585
 wfi_extrapolate                     11  7.9    0.001    0.001    7.530    7.530
 density_rs2pw                      129  9.7    0.005    0.006    7.003    7.459
 multiply_cannon_metrocomm4       25290 15.6    0.080    0.089    2.830    7.219
 mp_irecv_dv                      76751 16.2    2.677    6.941    2.677    6.941
 calculate_dm_sparse                129  9.5    0.001    0.001    6.336    6.426
 fft3d_ps                          1301 14.7    3.011    3.094    6.146    6.210
 grid_collocate_task_list           129  9.7    5.323    5.710    5.323    5.710
 mp_alltoall_d11v                  2429 14.1    5.068    5.651    5.068    5.651
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.425    5.465
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    4.509    4.603
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.348    4.450
 potential_pw2rs                    129 12.3    0.023    0.024    4.407    4.417
 qs_energies_init_hamiltonians       11  5.9    0.001    0.002    4.401    4.402
 qs_ot_get_orbitals                 118 10.6    0.001    0.001    4.302    4.358
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="406", plot="h2o_256_md", label="(8n/3r/4t)", y=190.675000, yerr=0.000000
PlotPoint: name="407", plot="h2o_256_md_mem", label="(8n/3r/4t)", y=1082.727273, yerr=24.491523
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/19/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.865088E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               1960712       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3445.7
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               1.572782E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                  240672
 MPI messages size (bytes):
  total size                         1.331455E+12
  min size                           0.000000E+00
  max size                          52.428800E+06
  average size                       5.532237E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                1452                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                 132                  8650752
    131072 < size <=  4194304              113904              59718500352
   4194304 < size <= 16777216              104976             550376570880
  16777216 < size                           20208             721350092304
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast               14                     12.
 MP_Allreduce         8931                     51.
 MP_Alltoall          9654                 799394.
 MP_ISend            40068                2102572.
 MP_IRecv            40068                2101675.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  58346.
 MP_Allreduce        11057                   1167.
 MP_Sync                87
 MP_Alltoall          1712               18838186.
 MP_SendRecv          3840                 122880.
 MP_ISendRecv         3840                 122880.
 MP_Wait             16122
 MP_ISend            10680                 423556.
 MP_IRecv            10680                 423556.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.015    0.052  168.985  169.003
 qs_mol_dyn_low                       1  2.0    0.003    0.004  168.599  168.612
 qs_forces                           11  3.9    0.004    0.004  168.506  168.508
 qs_energies                         11  4.9    0.027    0.032  161.138  161.147
 scf_env_do_scf                      11  5.9    0.001    0.001  143.954  143.965
 velocity_verlet                     10  3.0    0.002    0.002  110.849  110.853
 scf_env_do_scf_inner_loop          117  6.6    0.003    0.008  108.834  108.836
 dbcsr_multiply_generic            2507 12.6    0.187    0.193   71.984   72.479
 qs_scf_new_mos                     117  7.6    0.001    0.001   71.775   71.878
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   71.774   71.877
 ot_scf_mini                        117  9.6    0.003    0.004   67.363   67.465
 multiply_cannon                   2507 13.6    0.558    0.578   53.540   56.501
 multiply_cannon_loop              2507 14.6    0.807    0.833   50.432   51.534
 ot_mini                            117 10.6    0.001    0.001   37.695   37.792
 init_scf_loop                       11  6.9    0.000    0.000   34.967   34.969
 mp_waitall_1                    125778 16.7   24.788   31.311   24.788   31.311
 prepare_preconditioner              11  7.9    0.000    0.000   31.131   31.151
 make_preconditioner                 11  8.9    0.000    0.000   31.131   31.151
 rebuild_ks_matrix                  128  8.3    0.001    0.001   29.169   29.317
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.018   29.169   29.316
 make_full_inverse_cholesky          11  9.9    0.012    0.027   28.957   29.215
 qs_ks_update_qs_env                128  7.6    0.001    0.001   26.613   26.754
 multiply_cannon_multrec          10028 15.6   10.308   15.845   17.879   22.412
 qs_ot_get_derivative               117 11.6    0.002    0.002   20.447   20.545
 multiply_cannon_metrocomm3       10028 15.6    0.024    0.026   12.904   20.447
 cp_fm_cholesky_invert               11 10.9   17.708   17.713   17.708   17.713
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   17.299   17.479
 apply_single                       128 13.6    0.001    0.001   17.299   17.478
 ot_diis_step                       117 11.6    0.020    0.020   17.177   17.178
 qs_ot_get_p                        128 10.4    0.001    0.001   16.967   17.069
 make_m2s                          5014 13.6    0.064    0.068   14.595   15.504
 make_images                       5014 14.6    2.151    2.583   14.286   15.189
 qs_rho_update_rho_low              128  7.7    0.001    0.001   14.233   14.278
 calculate_rho_elec                 128  8.7    0.253    0.264   14.232   14.278
 sum_up_and_integrate               128 10.3    0.002    0.002   14.192   14.245
 integrate_v_rspace                 128 11.3    0.004    0.006   14.132   14.184
 qs_ot_p2m_diag                      83 11.4    0.496    0.501   13.254   13.270
 cp_dbcsr_syevd                      83 12.4    0.005    0.006   12.083   12.084
 multiply_cannon_sync_h2d         10028 15.6   10.724   11.216   10.724   11.216
 init_scf_run                        11  5.9    0.000    0.001   10.269   10.269
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   10.269   10.269
 pw_transfer                       1547 11.6    0.085    0.092    9.870    9.902
 make_images_data                  5014 15.6    0.054    0.065    8.358    9.708
 fft_wrap_pw1pw2                   1291 12.7    0.010    0.010    9.648    9.689
 hybrid_alltoall_any               5200 16.5    0.840    3.769    8.221    9.364
 cp_fm_diag_elpa                     83 13.4    0.000    0.000    9.230    9.241
 cp_fm_diag_elpa_base                83 14.4    8.983    9.065    9.221    9.232
 fft_wrap_pw1pw2_140                523 13.2    1.677    1.714    8.544    8.589
 qs_ot_get_derivative_diag           77 12.4    0.002    0.003    8.029    8.100
 grid_integrate_task_list           128 12.3    7.797    8.032    7.797    8.032
 dbcsr_mm_accdrv_process          20762 16.1    3.297    4.543    7.225    7.907
 cp_fm_cholesky_decompose            22 10.9    7.707    7.798    7.707    7.798
 multiply_cannon_metrocomm1       10028 15.6    0.030    0.031    4.295    7.520
 density_rs2pw                      128  9.7    0.005    0.005    6.957    7.305
 wfi_extrapolate                     11  7.9    0.001    0.001    7.271    7.271
 calculate_dm_sparse                128  9.5    0.001    0.001    6.048    6.127
 fft3d_ps                          1291 14.7    3.148    3.262    6.096    6.121
 grid_collocate_task_list           128  9.7    5.571    5.824    5.571    5.824
 multiply_cannon_metrocomm4        7521 15.6    0.026    0.029    1.872    5.689
 dbcsr_complete_redistribute        395 12.7    2.112    2.186    5.283    5.660
 mp_irecv_dv                      28860 15.9    1.831    5.598    1.831    5.598
 qs_energies_init_hamiltonians       11  5.9    0.009    0.011    5.328    5.329
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    5.119    5.133
 mp_alltoall_d11v                  2415 14.1    4.597    4.802    4.597    4.802
 mp_allgather_i34                  2507 14.6    1.307    4.559    1.307    4.559
 potential_pw2rs                    128 12.3    0.026    0.026    4.376    4.384
 cp_dbcsr_sm_fm_multiply_core        37 10.5    0.000    0.000    4.021    4.070
 build_core_hamiltonian_matrix_      11  4.9    0.001    0.001    3.572    3.861
 copy_fm_to_dbcsr                   209 11.7    0.002    0.002    3.443    3.774
 qs_ot_get_orbitals                 117 10.6    0.001    0.001    3.637    3.692
 qs_ot_get_derivative_taylor         40 13.0    0.001    0.001    3.588    3.679
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    3.423    3.491
 qs_ks_update_qs_env_forces          11  4.9    0.000    0.000    3.459    3.472
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="408", plot="h2o_256_md", label="(8n/2r/6t)", y=169.003000, yerr=0.000000
PlotPoint: name="409", plot="h2o_256_md_mem", label="(8n/2r/6t)", y=1458.090909, yerr=33.255087
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/20/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops     9 x     9 x    32        1420239992832       0.0%      0.0%    100.0%
 flops    32 x    32 x    32        1943472701440       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        1972057190400       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        1977770336256       0.0%      0.0%    100.0%
 flops    22 x    22 x    32        2734287699968       0.0%      0.0%    100.0%
 flops    32 x    32 x     9        4416300122112       0.0%      0.0%    100.0%
 flops    32 x    32 x    22        5397700149248       0.0%      0.0%    100.0%
 flops     9 x    32 x    32        5443971710976       0.0%      0.0%    100.0%
 flops    22 x    32 x    32        6653743202304       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       11528891191296       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       15129160814592       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       15129160814592       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       19767995056128       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                        93.514751E+12       0.0%      0.0%    100.0%
 flops max/rank                     11.696233E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         6755938624       0.0%      0.0%    100.0%
 number of processed stacks               1964048       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0    3439.8
 marketing flops                   144.579337E+12
 -------------------------------------------------------------------------------
 # multiplications                           2507
 max memory usage/rank               3.124339E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                  100280
 MPI messages size (bytes):
  total size                         1.136195E+12
  min size                           0.000000E+00
  max size                         104.857600E+06
  average size                      11.330227E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 572                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                  44                  2883584
    131072 < size <=  4194304               45208              35089547264
   4194304 < size <= 16777216               44352             379752284160
  16777216 < size                           10104             721350092304
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             3992                  59279.
 MP_Allreduce        11057                   1504.
 MP_Sync                87
 MP_Alltoall          1712               36974159.
 MP_SendRecv          1792                 218624.
 MP_ISendRecv         1792                 218624.
 MP_Wait              9802
 MP_ISend             6408                1080322.
 MP_IRecv             6408                1080322.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.020    0.039  289.640  289.642
 qs_mol_dyn_low                       1  2.0    0.003    0.004  289.115  289.128
 qs_forces                           11  3.9    0.004    0.005  289.024  289.030
 qs_energies                         11  4.9    0.002    0.002  280.076  280.079
 scf_env_do_scf                      11  5.9    0.001    0.001  257.650  257.657
 velocity_verlet                     10  3.0    0.002    0.002  209.445  209.452
 scf_env_do_scf_inner_loop          117  6.6    0.004    0.009  134.282  134.284
 init_scf_loop                       11  6.9    0.000    0.000  123.098  123.102
 prepare_preconditioner              11  7.9    0.000    0.000  118.252  118.272
 make_preconditioner                 11  8.9    0.000    0.000  118.252  118.272
 make_full_inverse_cholesky          11  9.9    0.038    0.039   94.695  115.398
 qs_scf_new_mos                     117  7.6    0.001    0.001   89.532   89.649
 qs_scf_loop_do_ot                  117  8.6    0.001    0.001   89.531   89.648
 ot_scf_mini                        117  9.6    0.004    0.004   84.679   84.723
 dbcsr_multiply_generic            2507 12.6    0.216    0.226   81.729   82.271
 cp_fm_upper_to_full                105 14.8   52.566   75.038   52.566   75.038
 multiply_cannon                   2507 13.6    0.686    0.730   58.625   59.832
 multiply_cannon_loop              2507 14.6    1.043    1.077   54.594   56.010
 ot_mini                            117 10.6    0.001    0.001   44.451   44.520
 dbcsr_complete_redistribute        395 12.7    4.000    4.082   29.548   42.070
 copy_fm_to_dbcsr                   209 11.7    0.001    0.002   26.049   38.600
 transfer_fm_to_dbcsr                11  9.9    0.030    0.030   23.515   35.862
 rebuild_ks_matrix                  128  8.3    0.001    0.001   34.718   34.770
 qs_ks_build_kohn_sham_matrix       128  9.3    0.017    0.017   34.717   34.770
 mp_alltoall_i22                    716 14.1   21.268   33.963   21.268   33.963
 cp_fm_cholesky_invert               11 10.9   33.094   33.100   33.094   33.100
 qs_ks_update_qs_env                128  7.6    0.001    0.001   32.198   32.248
 mp_waitall_1                    103674 16.8   27.914   32.107   27.914   32.107
 qs_ot_get_p                        128 10.4    0.001    0.001   25.398   25.494
 qs_ot_get_derivative               117 11.6    0.002    0.002   24.447   24.489
 qs_ot_p2m_diag                      83 11.4    0.879    0.884   21.351   21.381
 multiply_cannon_metrocomm3       10028 15.6    0.025    0.026   18.689   20.141
 make_m2s                          5014 13.6    0.074    0.077   18.550   20.050
 ot_diis_step                       117 11.6    0.022    0.022   19.967   19.967
 make_images                       5014 14.6    3.056    3.264   18.066   19.568
 cp_dbcsr_syevd                      83 12.4    0.006    0.006   19.556   19.558
 apply_preconditioner_dbcsr         128 12.6    0.000    0.000   19.302   19.450
 apply_single                       128 13.6    0.001    0.001   19.302   19.450
 multiply_cannon_multrec          10028 15.6   10.242   12.078   18.007   18.083
 qs_rho_update_rho_low              128  7.7    0.001    0.001   17.299   17.328
 calculate_rho_elec                 128  8.7    0.478    0.478   17.298   17.327
 sum_up_and_integrate               128 10.3    0.002    0.002   16.430   16.515
 integrate_v_rspace                 128 11.3    0.004    0.004   16.367   16.451
 cp_fm_diag_elpa                     83 13.4    0.000    0.000   16.385   16.386
 cp_fm_diag_elpa_base                83 14.4   12.014   13.572   16.382   16.382
 multiply_cannon_sync_h2d         10028 15.6   14.311   14.359   14.311   14.359
 pw_transfer                       1547 11.6    0.095    0.096   13.159   13.171
 fft_wrap_pw1pw2                   1291 12.7    0.011    0.012   12.921   12.934
 hybrid_alltoall_any               5200 16.5    1.314    3.046   10.742   12.867
 make_images_data                  5014 15.6    0.063    0.068   10.538   12.718
 init_scf_run                        11  5.9    0.000    0.001   12.073   12.073
 scf_env_initial_rho_setup           11  6.9    0.001    0.001   12.072   12.073
 fft_wrap_pw1pw2_140                523 13.2    3.227    3.268   11.510   11.524
 qs_ot_get_derivative_diag           77 12.4    0.002    0.002    9.665    9.694
 dbcsr_mm_accdrv_process          20762 16.1    4.227    5.980    7.514    9.370
 wfi_extrapolate                     11  7.9    0.001    0.001    8.958    8.958
 cp_fm_cholesky_decompose            22 10.9    8.828    8.845    8.828    8.845
 grid_integrate_task_list           128 12.3    8.552    8.745    8.552    8.745
 density_rs2pw                      128  9.7    0.005    0.005    8.335    8.390
 qs_energies_init_hamiltonians       11  5.9    0.002    0.002    8.169    8.170
 fft3d_ps                          1291 14.7    3.980    3.995    7.370    7.435
 mp_alltoall_d11v                  2415 14.1    6.871    7.029    6.871    7.029
 calculate_dm_sparse                128  9.5    0.001    0.001    6.728    6.773
 grid_collocate_task_list           128  9.7    6.415    6.491    6.415    6.491
 cp_dbcsr_sm_fm_multiply             37  9.5    0.002    0.002    6.300    6.352
 copy_dbcsr_to_fm                   186 11.8    0.004    0.004    6.156    6.222
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="410", plot="h2o_256_md", label="(8n/1r/12t)", y=289.642000, yerr=0.000000
PlotPoint: name="411", plot="h2o_256_md_mem", label="(8n/1r/12t)", y=2862.181818, yerr=135.724198
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/21/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      2.766000E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                419739       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   22952.9
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               1.260769E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  458208
 MPI messages size (bytes):
  total size                         3.456111E+12
  min size                           0.000000E+00
  max size                          18.735064E+06
  average size                       7.542668E+06
 MPI breakdown and total messages size (bytes):
             size <=      128              112896                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                 224                  5687808
     32768 < size <=   131072               10528                813356544
    131072 < size <=  4194304               36422              76284728544
   4194304 < size <= 16777216              294266            3312457683808
  16777216 < size                            3872              66548597808
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 255646.
 MP_Allreduce         3139                   6114.
 MP_Sync                 4
 MP_Alltoall            54               27994378.
 MP_SendRecv           285                  19200.
 MP_ISendRecv          285                  19200.
 MP_Wait              1017
 MP_ISend              642                 197829.
 MP_IRecv              642                 197607.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.014    0.033   84.980   84.981
 qs_energies                          1  2.0    0.000    0.000   84.499   84.505
 ls_scf                               1  3.0    0.000    0.000   83.585   83.592
 dbcsr_multiply_generic             111  6.7    0.018    0.029   72.534   72.742
 multiply_cannon                    111  7.7    0.017    0.020   55.816   57.051
 multiply_cannon_loop               111  8.7    0.228    0.239   52.401   53.961
 ls_scf_main                          1  4.0    0.000    0.000   52.162   52.162
 density_matrix_trs4                  2  5.0    0.002    0.003   46.652   46.749
 ls_scf_init_scf                      1  4.0    0.000    0.000   28.386   28.388
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   27.259   27.316
 mp_waitall_1                     11031 10.9   22.476   25.239   22.476   25.239
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   25.080   25.099
 multiply_cannon_multrec           2664  9.7    8.131    8.805   15.536   17.249
 multiply_cannon_sync_h2d          2664  9.7   13.376   15.415   13.376   15.415
 make_m2s                           222  7.7    0.009    0.011   13.047   13.604
 make_images                        222  8.7    0.098    0.107   13.025   13.586
 multiply_cannon_metrocomm1        2664  9.7    0.010    0.011    9.690   11.852
 multiply_cannon_metrocomm3        2664  9.7    0.009    0.010    5.531    8.191
 make_images_data                   222  9.7    0.004    0.005    7.583    8.097
 dbcsr_mm_accdrv_process           4760 10.4    0.592    0.707    7.024    8.027
 hybrid_alltoall_any                227 10.6    0.215    1.830    6.548    7.658
 dbcsr_mm_accdrv_process_sort      4760 11.4    6.233    7.133    6.233    7.133
 calculate_norms                   4752  9.8    5.508    6.101    5.508    6.101
 apply_matrix_preconditioner          6  5.3    0.000    0.000    5.030    5.163
 mp_sum_l                           887  5.1    3.127    4.565    3.127    4.565
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    2.356    3.441
 multiply_cannon_metrocomm4        2442  9.7    0.012    0.014    2.069    3.436
 make_images_sizes                  222  9.7    0.000    0.000    0.726    3.431
 mp_alltoall_i44                    222 10.7    0.726    3.431    0.726    3.431
 mp_irecv_dv                       6231 10.9    2.053    3.409    2.053    3.409
 arnoldi_extremal                     4  6.8    0.000    0.000    3.256    3.291
 arnoldi_normal_ev                    4  7.8    0.005    0.010    3.256    3.290
 build_subspace                      16  8.4    0.009    0.013    3.163    3.166
 ls_scf_post                          1  4.0    0.000    0.000    3.036    3.043
 ls_scf_store_result                  1  5.0    0.000    0.000    2.854    2.898
 dbcsr_special_finalize             555  9.7    0.005    0.006    2.383    2.823
 dbcsr_merge_single_wm              555 10.7    0.462    0.595    2.375    2.814
 make_images_pack                   222  9.7    2.208    2.585    2.210    2.587
 dbcsr_matrix_vector_mult           304  9.0    0.006    0.014    2.330    2.570
 dbcsr_sort_data                    658 11.4    2.171    2.536    2.171    2.536
 dbcsr_matrix_vector_mult_local     304 10.0    2.070    2.458    2.072    2.460
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.278    2.378
 buffer_matrices_ensure_size        222  8.7    1.753    2.103    1.753    2.103
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.760    1.761
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.750    1.752
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    1.750    1.752
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="500", plot="h2o_32_nrep3_ls", label="(8n/12r/1t)", y=84.981000, yerr=0.000000
PlotPoint: name="501", plot="h2o_32_nrep3_ls_mem", label="(8n/12r/1t)", y=1132.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/22/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      5.588524E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                368848       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26119.8
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.175656E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                  106560
 MPI messages size (bytes):
  total size                         2.699093E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      25.329324E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               23040                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                3264                325830144
    131072 < size <=  4194304                5280               3328561104
   4194304 < size <= 16777216               12709             156766962056
  16777216 < size                           62267            2538670978840
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  10075.
 MP_Sync                 4
 MP_Alltoall            47               15335933.
 MP_SendRecv           141                  57600.
 MP_ISendRecv          141                  57600.
 MP_Wait               687
 MP_ISend              462                 414589.
 MP_IRecv              462                 413870.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.023    0.042   89.385   89.386
 qs_energies                          1  2.0    0.000    0.000   88.954   88.963
 ls_scf                               1  3.0    0.000    0.000   87.622   87.631
 dbcsr_multiply_generic             111  6.7    0.015    0.015   73.829   74.181
 multiply_cannon                    111  7.7    0.027    0.037   52.741   56.602
 ls_scf_main                          1  4.0    0.000    0.000   54.176   54.181
 multiply_cannon_loop               111  8.7    0.135    0.147   49.929   52.656
 density_matrix_trs4                  2  5.0    0.002    0.003   48.547   48.761
 ls_scf_init_scf                      1  4.0    0.000    0.000   29.740   29.742
 mp_waitall_1                      9105 10.9   20.511   29.736   20.511   29.736
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.572   28.655
 multiply_cannon_multrec           1332  9.7   13.375   17.007   22.686   27.436
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.218   26.231
 multiply_cannon_metrocomm3        1332  9.7    0.007    0.008   11.483   20.204
 make_m2s                           222  7.7    0.006    0.007   14.885   15.472
 make_images                        222  8.7    1.364    1.691   14.854   15.442
 dbcsr_mm_accdrv_process           4041 10.4    0.356    0.533    8.906   10.471
 dbcsr_mm_accdrv_process_sort      4041 11.4    8.423    9.937    8.423    9.937
 make_images_data                   222  9.7    0.004    0.005    8.578    9.452
 hybrid_alltoall_any                227 10.6    0.541    2.532    7.962    9.379
 multiply_cannon_metrocomm4        1221  9.7    0.007    0.009    3.200    7.713
 mp_irecv_dv                       3311 11.0    3.180    7.653    3.180    7.653
 mp_sum_l                           887  5.1    4.716    7.074    4.716    7.074
 calculate_norms                   2376  9.8    6.067    6.704    6.067    6.704
 multiply_cannon_sync_h2d          1332  9.7    4.902    6.237    4.902    6.237
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.547    5.676
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.867    5.080
 arnoldi_extremal                     4  6.8    0.000    0.000    4.618    4.636
 arnoldi_normal_ev                    4  7.8    0.001    0.005    4.618    4.635
 build_subspace                      16  8.4    0.014    0.021    4.364    4.368
 ls_scf_post                          1  4.0    0.000    0.000    3.705    3.714
 ls_scf_store_result                  1  5.0    0.000    0.000    3.410    3.530
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.164    3.388
 dbcsr_matrix_vector_mult_local     304 10.0    2.782    3.258    2.784    3.259
 mp_allgather_i34                   111  8.7    0.809    2.985    0.809    2.985
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.586    2.666
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.004    1.098    2.535
 dbcsr_data_new                    4174 10.1    2.111    2.398    2.111    2.398
 dbcsr_sort_data                    436 11.2    1.839    2.126    1.839    2.126
 make_images_pack                   222  9.7    1.817    2.117    1.820    2.119
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.842    1.845
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.830    1.832
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    1.830    1.832
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="502", plot="h2o_32_nrep3_ls", label="(8n/6r/2t)", y=89.386000, yerr=0.000000
PlotPoint: name="503", plot="h2o_32_nrep3_ls_mem", label="(8n/6r/2t)", y=1779.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/23/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                      8.404608E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                353133       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   27282.1
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               2.852221E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                   46176
 MPI messages size (bytes):
  total size                         1.924064E+12
  min size                           0.000000E+00
  max size                         108.059888E+06
  average size                      41.668048E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                9984                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                3328               1170063360
   4194304 < size <= 16777216                1870              19378539600
  16777216 < size                           30994            1903514987232
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265448.
 MP_Allreduce         3138                  10896.
 MP_Sync                 4
 MP_Alltoall            47               23526250.
 MP_SendRecv            93                  57600.
 MP_ISendRecv           93                  57600.
 MP_Wait               639
 MP_ISend              462                 560046.
 MP_IRecv              462                 560662.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.027    0.049   92.663   92.663
 qs_energies                          1  2.0    0.000    0.000   92.115   92.120
 ls_scf                               1  3.0    0.000    0.000   90.712   90.717
 dbcsr_multiply_generic             111  6.7    0.016    0.021   75.315   75.578
 ls_scf_main                          1  4.0    0.000    0.000   56.649   56.653
 multiply_cannon                    111  7.7    0.029    0.075   51.971   55.617
 multiply_cannon_loop               111  8.7    0.117    0.130   49.216   53.754
 density_matrix_trs4                  2  5.0    0.002    0.003   50.661   50.891
 mp_waitall_1                      7281 11.0   23.505   33.870   23.505   33.870
 ls_scf_init_scf                      1  4.0    0.000    0.000   30.387   30.390
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   29.174   29.269
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   26.836   26.849
 multiply_cannon_multrec            888  9.7   12.651   15.569   21.244   24.759
 multiply_cannon_metrocomm3         888  9.7    0.004    0.004   11.086   23.432
 make_m2s                           222  7.7    0.006    0.007   16.160   16.936
 make_images                        222  8.7    1.581    1.857   16.121   16.896
 make_images_data                   222  9.7    0.004    0.005    9.535   10.518
 hybrid_alltoall_any                227 10.6    0.639    2.942    9.086   10.182
 dbcsr_mm_accdrv_process           3754 10.4    0.300    0.490    8.121    9.376
 mp_sum_l                           887  5.1    5.484    8.903    5.484    8.903
 dbcsr_mm_accdrv_process_sort      3754 11.4    7.686    8.886    7.686    8.886
 multiply_cannon_sync_h2d           888  9.7    6.077    7.481    6.077    7.481
 multiply_cannon_metrocomm4         777  9.7    0.004    0.005    2.496    7.043
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    4.296    7.004
 mp_irecv_dv                       2335 11.1    2.479    6.998    2.479    6.998
 multiply_cannon_metrocomm1         888  9.7    0.003    0.003    3.524    6.960
 arnoldi_extremal                     4  6.8    0.000    0.000    5.160    5.175
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.160    5.175
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.813    5.097
 build_subspace                      16  8.4    0.014    0.020    4.862    4.869
 calculate_norms                   1584  9.8    4.327    4.750    4.327    4.750
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.021    3.507    3.834
 mp_allgather_i34                   111  8.7    0.880    3.716    0.880    3.716
 ls_scf_post                          1  4.0    0.000    0.000    3.677    3.681
 dbcsr_matrix_vector_mult_local     304 10.0    3.072    3.661    3.074    3.663
 ls_scf_store_result                  1  5.0    0.000    0.000    3.435    3.504
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    2.881    3.012
 dbcsr_data_new                    4116  9.9    2.113    2.457    2.113    2.457
 make_images_sizes                  222  9.7    0.000    0.000    0.811    2.371
 mp_alltoall_i44                    222 10.7    0.811    2.370    0.811    2.370
 dbcsr_sort_data                    325 11.1    1.863    2.106    1.863    2.106
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    1.887    1.889
 make_images_pack                   222  9.7    1.620    1.880    1.622    1.883
 rebuild_ks_matrix                    3  7.3    0.000    0.000    1.870    1.871
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    1.870    1.871
 dbcsr_finalize                     304  7.8    0.026    0.032    1.603    1.861
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="504", plot="h2o_32_nrep3_ls", label="(8n/4r/3t)", y=92.663000, yerr=0.000000
PlotPoint: name="505", plot="h2o_32_nrep3_ls_mem", label="(8n/4r/3t)", y=2216.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/24/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     10.747127E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                369794       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   26053.0
 marketing flops                     1.742116E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               3.362456E+09
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged                   50616
 MPI messages size (bytes):
  total size                         1.536549E+12
  min size                           0.000000E+00
  max size                          72.286792E+06
  average size                      30.356986E+06
 MPI breakdown and total messages size (bytes):
             size <=      128               10368                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                1056                104411904
    131072 < size <=  4194304                3168                831638784
   4194304 < size <= 16777216                3103              33613273640
  16777216 < size                           32921            1501999894888
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 266673.
 MP_Allreduce         3138                  13030.
 MP_Sync                 4
 MP_Alltoall            47               30278988.
 MP_SendRecv            69                  86400.
 MP_ISendRecv           69                  86400.
 MP_Wait               531
 MP_ISend              378                 823502.
 MP_IRecv              378                 823753.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.030    0.047   96.816   96.817
 qs_energies                          1  2.0    0.000    0.000   96.220   96.225
 ls_scf                               1  3.0    0.000    0.000   94.564   94.569
 dbcsr_multiply_generic             111  6.7    0.016    0.017   78.220   78.466
 ls_scf_main                          1  4.0    0.000    0.000   58.838   58.839
 multiply_cannon                    111  7.7    0.042    0.081   51.667   56.184
 density_matrix_trs4                  2  5.0    0.002    0.003   52.690   52.834
 multiply_cannon_loop               111  8.7    0.153    0.168   46.576   49.278
 ls_scf_init_scf                      1  4.0    0.000    0.000   32.484   32.487
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   31.269   31.346
 mp_waitall_1                      6369 11.0   22.515   29.086   22.515   29.086
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   28.809   28.821
 multiply_cannon_multrec           1332  9.7   14.194   17.488   22.122   24.858
 make_m2s                           222  7.7    0.007    0.008   20.978   22.457
 make_images                        222  8.7    3.131    3.595   20.928   22.409
 multiply_cannon_metrocomm3        1332  9.7    0.003    0.004    9.137   16.644
 make_images_data                   222  9.7    0.004    0.004   11.715   13.297
 hybrid_alltoall_any                227 10.6    0.798    3.775   11.046   13.132
 dbcsr_mm_accdrv_process           3641 10.4    0.297    0.475    7.573    9.104
 dbcsr_mm_accdrv_process_sort      3641 11.4    7.131    8.606    7.131    8.606
 mp_sum_l                           887  5.1    4.002    7.615    4.002    7.615
 multiply_cannon_metrocomm4        1110  9.7    0.005    0.007    2.084    6.063
 multiply_cannon_sync_h2d          1332  9.7    5.472    6.005    5.472    6.005
 mp_irecv_dv                       3229 10.9    2.059    5.986    2.059    5.986
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    3.046    5.961
 arnoldi_extremal                     4  6.8    0.000    0.000    5.309    5.321
 arnoldi_normal_ev                    4  7.8    0.001    0.005    5.309    5.321
 multiply_cannon_metrocomm1        1332  9.7    0.003    0.003    2.494    5.066
 build_subspace                      16  8.4    0.014    0.021    4.967    4.977
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.565    4.746
 mp_allgather_i34                   111  8.7    2.209    4.729    2.209    4.729
 calculate_norms                   2376  9.8    4.167    4.513    4.167    4.513
 dbcsr_matrix_vector_mult           304  9.0    0.010    0.022    3.650    3.969
 dbcsr_matrix_vector_mult_local     304 10.0    3.250    3.747    3.252    3.749
 dbcsr_sort_data                    658 11.4    3.089    3.454    3.089    3.454
 ls_scf_post                          1  4.0    0.000    0.000    3.241    3.248
 dbcsr_special_finalize             555  9.7    0.006    0.007    2.847    3.222
 dbcsr_merge_single_wm              555 10.7    0.544    0.665    2.838    3.214
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.022    3.092
 ls_scf_store_result                  1  5.0    0.000    0.000    2.985    3.045
 dbcsr_data_release               10477 10.7    1.566    2.380    1.566    2.380
 dbcsr_finalize                     304  7.8    0.049    0.061    1.792    1.979
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="506", plot="h2o_32_nrep3_ls", label="(8n/3r/4t)", y=96.817000, yerr=0.000000
PlotPoint: name="507", plot="h2o_32_nrep3_ls_mem", label="(8n/3r/4t)", y=2762.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/25/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     15.383312E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                336818       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28603.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               4.751692E+09
 # max total images/rank                        1
 # max 3D layers                                1
 # MPI messages exchanged                   10656
 MPI messages size (bytes):
  total size                         1.149035E+12
  min size                           0.000000E+00
  max size                         203.538048E+06
  average size                     107.829832E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                2304                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 768                702038016
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            7584            1148332810224
 -------------------------------------------------------------------------------
 -                                                                             -
 -                      DBCSR MESSAGE PASSING PERFORMANCE                      -
 -                                                                             -
 -------------------------------------------------------------------------------
 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Bcast                2                     12.
 MP_Allreduce          705                    128.
 MP_Alltoall           310               12920694.
 MP_ISend             1776               40180424.
 MP_IRecv             1776               40465030.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 265536.
 MP_Allreduce         3129                  15263.
 MP_Sync                 4
 MP_Alltoall            47               46208988.
 MP_SendRecv            45                 115200.
 MP_ISendRecv           45                 115200.
 MP_Wait               528
 MP_ISend              420                 924980.
 MP_IRecv              420                 924528.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.038    0.057   91.598   91.598
 qs_energies                          1  2.0    0.000    0.000   90.849   90.861
 ls_scf                               1  3.0    0.000    0.000   88.902   88.915
 dbcsr_multiply_generic             111  6.7    0.017    0.018   70.385   70.547
 ls_scf_main                          1  4.0    0.000    0.000   56.193   56.193
 multiply_cannon                    111  7.7    0.081    0.141   52.388   55.265
 multiply_cannon_loop               111  8.7    0.088    0.093   49.806   50.881
 density_matrix_trs4                  2  5.0    0.002    0.003   49.192   49.293
 ls_scf_init_scf                      1  4.0    0.000    0.000   29.324   29.327
 mp_waitall_1                      5436 11.0   24.010   28.623   24.010   28.623
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   28.065   28.092
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   25.984   25.995
 multiply_cannon_multrec            444  9.7   13.758   16.494   20.861   22.337
 multiply_cannon_metrocomm1         444  9.7    0.002    0.002   10.435   15.073
 make_m2s                           222  7.7    0.005    0.005   13.368   14.313
 make_images                        222  8.7    2.039    2.480   13.301   14.246
 multiply_cannon_metrocomm3         444  9.7    0.001    0.002    6.279   13.986
 make_images_data                   222  9.7    0.003    0.004    8.114    9.670
 hybrid_alltoall_any                227 10.6    0.803    3.841    8.047    9.457
 dbcsr_mm_accdrv_process           3003 10.4    0.363    0.408    6.792    7.897
 multiply_cannon_sync_h2d           444  9.7    6.777    7.737    6.777    7.737
 dbcsr_mm_accdrv_process_sort      3003 11.4    6.429    7.502    6.429    7.502
 arnoldi_extremal                     4  6.8    0.000    0.000    5.897    5.911
 arnoldi_normal_ev                    4  7.8    0.002    0.005    5.897    5.911
 build_subspace                      16  8.4    0.015    0.020    5.499    5.507
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.415    4.698
 dbcsr_matrix_vector_mult           304  9.0    0.011    0.021    4.238    4.453
 dbcsr_matrix_vector_mult_local     304 10.0    3.777    4.268    3.779    4.270
 mp_sum_l                           887  5.1    2.619    4.179    2.619    4.179
 multiply_cannon_metrocomm4         333  9.7    0.001    0.002    1.500    3.780
 mp_irecv_dv                       1241 11.2    1.485    3.737    1.485    3.737
 calculate_norms                    792  9.8    3.617    3.708    3.617    3.708
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    3.551    3.618
 mp_allgather_i34                   111  8.7    1.157    3.525    1.157    3.525
 ls_scf_post                          1  4.0    0.000    0.000    3.385    3.397
 dbcsr_multiply_generic_mpsum_f      86  7.8    0.000    0.000    1.830    3.312
 make_images_sizes                  222  9.7    0.000    0.000    0.868    3.227
 mp_alltoall_i44                    222 10.7    0.868    3.227    0.868    3.227
 ls_scf_store_result                  1  5.0    0.000    0.000    3.172    3.203
 dbcsr_finalize                     304  7.8    0.062    0.077    2.204    2.303
 dbcsr_data_new                    4608  9.7    1.786    2.215    1.786    2.215
 dbcsr_merge_all                    275  8.9    0.480    0.526    2.061    2.134
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.046    2.046
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.013    2.014
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.001    2.013    2.014
 qs_energies_init_hamiltonians        1  3.0    0.001    0.002    1.931    1.931
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="508", plot="h2o_32_nrep3_ls", label="(8n/2r/6t)", y=91.598000, yerr=0.000000
PlotPoint: name="509", plot="h2o_32_nrep3_ls_mem", label="(8n/2r/6t)", y=3735.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/26/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    23 x    23 x    23      234439235724792       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       234.439236E+12       0.0%      0.0%    100.0%
 flops max/rank                     30.358840E+12       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                         9634225188       0.0%      0.0%    100.0%
 number of processed stacks                339931       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0   28341.7
 marketing flops                     1.742118E+15
 -------------------------------------------------------------------------------
 # multiplications                            111
 max memory usage/rank               8.836071E+09
 # max total images/rank                        2
 # max 3D layers                                1
 # MPI messages exchanged                    4440
 MPI messages size (bytes):
  total size                       770.525954E+09
  min size                           0.000000E+00
  max size                         399.069120E+06
  average size                     173.541888E+06
 MPI breakdown and total messages size (bytes):
             size <=      128                 640                        0
       128 < size <=     8192                   0                        0
      8192 < size <=    32768                   0                        0
     32768 < size <=   131072                   0                        0
    131072 < size <=  4194304                 640                468025344
   4194304 < size <= 16777216                   0                        0
  16777216 < size                            3160             770057961712
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             1026                 284089.
 MP_Allreduce         3123                  21388.
 MP_Sync                 4
 MP_Alltoall            47               88727262.
 MP_SendRecv            42                 732600.
 MP_ISendRecv           42                 732600.
 MP_Wait               267
 MP_ISend              180                3337386.
 MP_IRecv              180                3339494.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.072    0.089  105.614  105.614
 qs_energies                          1  2.0    0.000    0.000  104.192  104.200
 ls_scf                               1  3.0    0.000    0.000  101.215  101.222
 dbcsr_multiply_generic             111  6.7    0.023    0.026   74.678   74.830
 ls_scf_main                          1  4.0    0.000    0.000   63.826   63.826
 density_matrix_trs4                  2  5.0    0.002    0.003   54.795   54.853
 multiply_cannon                    111  7.7    0.141    0.194   48.249   50.372
 multiply_cannon_loop               111  8.7    0.097    0.100   45.295   45.863
 ls_scf_init_scf                      1  4.0    0.000    0.000   33.600   33.601
 ls_scf_init_matrix_S                 1  5.0    0.000    0.000   32.040   32.063
 matrix_sqrt_Newton_Schulz            2  6.5    0.001    0.001   29.352   29.361
 mp_waitall_1                      4527 11.1   21.800   25.256   21.800   25.256
 make_m2s                           222  7.7    0.005    0.005   22.703   23.768
 make_images                        222  8.7    3.565    3.859   22.595   23.658
 multiply_cannon_multrec            444  9.7   17.820   18.467   22.482   23.120
 hybrid_alltoall_any                227 10.6    1.653    3.640   12.892   15.812
 make_images_data                   222  9.7    0.003    0.004   13.120   15.186
 multiply_cannon_metrocomm3         444  9.7    0.001    0.001   10.116   10.503
 multiply_cannon_sync_h2d           444  9.7    8.791    8.856    8.791    8.856
 arnoldi_extremal                     4  6.8    0.000    0.000    7.502    7.514
 arnoldi_normal_ev                    4  7.8    0.003    0.009    7.502    7.514
 build_subspace                      16  8.4    0.026    0.036    6.953    6.964
 dbcsr_matrix_vector_mult           304  9.0    0.017    0.034    5.605    5.771
 dbcsr_matrix_vector_mult_local     304 10.0    5.167    5.487    5.170    5.489
 ls_scf_dm_to_ks                      2  5.0    0.000    0.000    5.186    5.283
 apply_matrix_preconditioner          6  5.3    0.000    0.000    4.826    5.084
 dbcsr_mm_accdrv_process           1814 10.4    0.291    0.355    4.475    4.598
 dbcsr_mm_accdrv_process_sort      1814 11.4    4.138    4.268    4.138    4.268
 ls_scf_post                          1  4.0    0.000    0.000    3.789    3.797
 ls_scf_store_result                  1  5.0    0.000    0.000    3.511    3.549
 make_images_sizes                  222  9.7    0.000    0.000    1.468    3.529
 mp_alltoall_i44                    222 10.7    1.468    3.529    1.468    3.529
 mp_allgather_i34                   111  8.7    1.071    3.399    1.071    3.399
 calculate_norms                    792  9.8    3.231    3.268    3.231    3.268
 dbcsr_finalize                     304  7.8    0.082    0.089    3.073    3.157
 qs_energies_init_hamiltonians        1  3.0    0.001    0.002    2.947    2.947
 dbcsr_merge_all                    275  8.9    0.884    0.916    2.858    2.937
 dbcsr_complete_redistribute          5  7.6    1.443    1.484    2.755    2.878
 matrix_ls_to_qs                      2  6.0    0.000    0.000    2.402    2.534
 dbcsr_sort_data                    325 11.1    2.437    2.490    2.437    2.490
 qs_ks_update_qs_env                  3  6.3    0.000    0.000    2.465    2.467
 rebuild_ks_matrix                    3  7.3    0.000    0.000    2.399    2.401
 qs_ks_build_kohn_sham_matrix         3  8.3    0.000    0.000    2.399    2.401
 dbcsr_data_new                    6591  9.6    1.859    2.370    1.859    2.370
 dbcsr_new_transposed                 4  7.5    0.243    0.253    2.281    2.298
 dbcsr_frobenius_norm                74  6.6    2.059    2.138    2.205    2.238
 dbcsr_add_d                        103  6.2    0.000    0.000    2.125    2.202
 dbcsr_add_anytype                  103  7.2    0.859    0.890    2.124    2.202
 dbcsr_data_release               12724 10.6    1.971    2.175    1.971    2.175
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="510", plot="h2o_32_nrep3_ls", label="(8n/1r/12t)", y=105.614000, yerr=0.000000
PlotPoint: name="511", plot="h2o_32_nrep3_ls_mem", label="(8n/1r/12t)", y=6944.000000, yerr=0.000000
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


 ~~~~~~~~~ RESULT ~~~~~~~~~
RESULT file: /scratch/snx3000/mkrack/rt/../rt/CRAY-XC50-gnu/183bfa028a10482cb2ce2952b7487d62cedfbee6_performance_tests/27/result.log


 @@@@@@@@@@ Run number: 1 @@@@@@@@@@

 -------------------------------------------------------------------------------
 -                                                                             -
 -                                DBCSR STATISTICS                             -
 -                                                                             -
 -------------------------------------------------------------------------------
 COUNTER                                    TOTAL       BLAS       SMM       ACC
 flops    32 x    32 x    32        7009386627072       0.0%      0.0%    100.0%
 flops     9 x     9 x    32        7335108845568       0.0%      0.0%    100.0%
 flops     9 x    22 x    32        9866241589248       0.0%      0.0%    100.0%
 flops    22 x     9 x    32        9884108906496       0.0%      0.0%    100.0%
 flops    22 x    22 x    32       13354440523776       0.0%      0.0%    100.0%
 flops    32 x    32 x     9       20607185977344       0.0%      0.0%    100.0%
 flops    32 x    32 x    22       25186560638976       0.0%      0.0%    100.0%
 flops     9 x    32 x    32       28458319085568       0.0%      0.0%    100.0%
 flops    22 x    32 x    32       34782389993472       0.0%      0.0%    100.0%
 flops     9 x    32 x     9       42881542373376       0.0%      0.0%    100.0%
 flops    22 x    32 x     9       55680402235392       0.0%      0.0%    100.0%
 flops     9 x    32 x    22       55680402235392       0.0%      0.0%    100.0%
 flops    22 x    32 x    22       72328573419520       0.0%      0.0%    100.0%
 flops inhomo. stacks                           0       0.0%      0.0%      0.0%
 flops total                       383.054662E+12       0.0%      0.0%    100.0%
 flops max/rank                    733.641090E+09       0.0%      0.0%    100.0%
 matmuls inhomo. stacks                         0       0.0%      0.0%      0.0%
 matmuls total                        26899403712       0.0%      0.0%    100.0%
 number of processed stacks             118860288       0.0%      0.0%    100.0%
 average stack size                                     0.0       0.0     226.3
 marketing flops                   780.439111E+12
 -------------------------------------------------------------------------------
 # multiplications                           1445
 max memory usage/rank             593.256448E+06
 # max total images/rank                        3
 # max 3D layers                                1
 # MPI messages exchanged               102097920
 MPI messages size (bytes):
  total size                        37.227590E+12
  min size                           0.000000E+00
  max size                           4.551360E+06
  average size                     364.626312E+03
 MPI breakdown and total messages size (bytes):
             size <=      128              731472                        0
       128 < size <=     8192            11922720              97670922240
      8192 < size <=    32768            24718992             614677610496
     32768 < size <=   131072            20000256            1970081366016
    131072 < size <=  4194304            42515668           24886801223040
   4194304 < size <= 16777216             2208812            9656099886720
  16777216 < size                               0                        0
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                         MESSAGE PASSING PERFORMANCE                         -
 -                                                                             -
 -------------------------------------------------------------------------------

 ROUTINE             CALLS      AVE VOLUME [Bytes]
 MP_Group                4
 MP_Bcast             4640                  78072.
 MP_Allreduce        13232                   2081.
 MP_Sync              1064
 MP_Alltoall          2588                4222134.
 MP_SendRecv        168740                  11136.
 MP_ISendRecv        92040                  11136.
 MP_Wait            102830
 MP_comm_split          40
 MP_ISend            26090                  85106.
 MP_IRecv            37890                  59644.
 -------------------------------------------------------------------------------
 -------------------------------------------------------------------------------
 -                                                                             -
 -                                T I M I N G                                  -
 -                                                                             -
 -------------------------------------------------------------------------------
 SUBROUTINE                       CALLS  ASD         SELF TIME        TOTAL TIME
                                MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM
 CP2K                                 1  1.0    0.042    0.116  222.713  222.715
 qs_mol_dyn_low                       1  2.0    0.005    0.017  221.461  221.513
 qs_forces                            5  3.8    0.006    0.058  221.323  221.338
 qs_energies                          5  4.8    0.003    0.031  218.275  218.301
 scf_env_do_scf                       5  5.8    0.001    0.009  203.176  203.179
 scf_env_do_scf_inner_loop          105  6.6    0.002    0.006  176.963  176.968
 qs_scf_new_mos                     105  7.6    0.000    0.001  138.411  138.569
 qs_scf_loop_do_ot                  105  8.6    0.001    0.001  138.411  138.569
 dbcsr_multiply_generic            1445 12.2    0.129    0.139  132.397  132.849
 ot_scf_mini                        105  9.6    0.003    0.004  128.533  128.698
 multiply_cannon                   1445 13.2    0.275    0.284  114.044  116.275
 multiply_cannon_loop              1445 14.2    2.844    3.026  112.458  113.858
 velocity_verlet                      4  3.0    0.004    0.018  104.656  104.658
 ot_mini                            105 10.6    0.001    0.003   59.253   59.371
 multiply_cannon_multrec          69360 15.2   29.729   34.307   39.707   44.621
 mp_waitall_1                    488190 16.1   33.717   40.652   33.717   40.652
 qs_ot_get_p                        112 10.4    0.001    0.003   39.529   39.798
 qs_ot_get_derivative                55 11.6    0.001    0.001   37.621   37.739
 multiply_cannon_sync_h2d         69360 15.2   29.218   33.574   29.218   33.574
 multiply_cannon_metrocomm3       69360 15.2    0.200    0.211   25.251   32.262
 qs_ot_p2m_diag                      40 11.0    0.020    0.033   28.659   28.724
 rebuild_ks_matrix                  110  8.4    0.000    0.000   28.315   28.478
 qs_ks_build_kohn_sham_matrix       110  9.4    0.013    0.040   28.315   28.478
 init_scf_loop                        7  6.6    0.000    0.005   26.184   26.185
 qs_ks_update_qs_env                112  7.6    0.001    0.001   25.875   26.029
 cp_dbcsr_syevd                      40 12.0    0.002    0.003   25.464   25.465
 apply_preconditioner_dbcsr          62 12.6    0.000    0.001   22.919   23.144
 apply_single                        62 13.6    0.000    0.000   22.919   23.144
 prepare_preconditioner               7  7.6    0.000    0.000   21.413   21.441
 make_preconditioner                  7  8.6    0.000    0.003   21.413   21.441
 ot_new_cg_direction                 55 11.6    0.001    0.002   20.947   20.947
 cp_fm_syevd                         40 13.0    0.000    0.002   20.508   20.659
 qs_rho_update_rho_low              110  7.6    0.001    0.001   15.944   16.330
 calculate_rho_elec                 110  8.6    0.030    0.032   15.943   16.329
 cp_fm_redistribute_end              40 14.0    7.954   15.870    7.958   15.872
 cp_fm_syevd_base                    40 14.0    7.908   15.827    7.908   15.827
 qs_ot_get_orbitals                 105 10.6    0.001    0.001   14.613   14.771
 make_full_inverse_cholesky           7  9.6    0.000    0.001   14.390   14.458
 qs_ot_get_derivative_taylor         37 12.8    0.001    0.001   13.607   13.722
 mp_sum_l                          4764 12.2   11.827   12.847   11.827   12.847
 init_scf_run                         5  5.8    0.000    0.001   12.430   12.431
 scf_env_initial_rho_setup            5  6.8    0.002    0.007   12.430   12.431
 calculate_dm_sparse                110  9.5    0.000    0.001   11.499   11.652
 pw_transfer                       1645 12.4    0.079    0.095   11.279   11.452
 fft_wrap_pw1pw2                   1425 13.5    0.012    0.014   11.141   11.318
 density_rs2pw                      110  9.6    0.005    0.006   10.353   10.926
 dbcsr_mm_accdrv_process         154766 15.8    6.267    6.486    9.846   10.661
 qs_vxc_create                      110 10.4    0.002    0.008   10.150   10.184
 cp_fm_cholesky_invert                7 10.6   10.161   10.168   10.161   10.168
 qs_ot_get_derivative_diag           18 12.0    0.000    0.001   10.082   10.158
 fft_wrap_pw1pw2_240                915 15.0    0.839    0.917    9.782   10.000
 check_diag                          80 13.5    8.595    8.886    9.274    9.424
 acc_transpose_blocks             69360 15.2    0.359    0.373    7.976    8.376
 sum_up_and_integrate                60 10.3    0.001    0.003    8.218    8.251
 integrate_v_rspace                  60 11.3    0.001    0.002    8.201    8.234
 fft3d_pb                           915 16.0    2.402    2.576    7.952    8.121
 transfer_rs2pw                     445 10.6    0.007    0.008    7.219    7.787
 cp_dbcsr_sm_fm_multiply             15  9.3    0.001    0.002    7.697    7.713
 calculate_first_density_matrix       1  7.0    0.000    0.004    7.667    7.682
 xc_rho_set_and_dset_create         110 12.4    0.076    0.097    7.168    7.425
 cp_dbcsr_sm_fm_multiply_core        15 10.3    0.000    0.000    7.303    7.350
 multiply_cannon_metrocomm1       69360 15.2    0.093    0.100    4.372    7.020
 make_full_single_inverse             7  9.6    0.001    0.004    6.760    6.789
 xc_vxc_pw_create                    60 11.3    0.039    0.049    6.751    6.784
 make_m2s                          2890 13.2    0.078    0.089    6.057    6.611
 make_images                       2890 14.2    0.236    0.257    5.949    6.502
 xc_pw_derive                       510 13.4    0.006    0.007    5.935    5.999
 acc_transpose_blocks_kernels     69360 16.2    0.837    0.886    5.214    5.491
 mp_alltoall_z22v                  2340 17.7    4.918    5.231    4.918    5.231
 mp_waitany                        7680 13.5    4.511    5.097    4.511    5.097
 jit_kernel_transpose                 5 15.0    4.378    4.619    4.378    4.619
 potential_pw2rs                     60 12.3    0.002    0.005    4.511    4.549
 multiply_cannon_metrocomm4       67915 15.2    0.186    0.201    2.007    4.535
 -------------------------------------------------------------------------------
 ~ ~ ~ ~  DATA POINTS  ~ ~ ~ ~
PlotPoint: name="601", plot="h2o_512_md", label="(64n/12r/1t)", y=222.715000, yerr=0.000000
PlotPoint: name="602", plot="h2o_512_md_mem", label="(64n/12r/1t)", y=563.800000, yerr=1.833030
 ~ ~ ~ ~ END DATA POINTS ~ ~ ~
 ~~~~~~ END RESULT ~~~~~~~~


========= END RESULTS ===========

CommitSHA: 183bfa028a10482cb2ce2952b7487d62cedfbee6
Summary: empty
Status: OK