LCOV - code coverage report
Current view: top level - src - library_tests.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:34ef472) Lines: 626 705 88.8 %
Date: 2024-04-26 08:30:29 Functions: 11 11 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Performance tests for basic tasks like matrix multiplies, copy, fft.
      10             : !> \par History
      11             : !>      30-Nov-2000 (JGH) added input
      12             : !>      02-Jan-2001 (JGH) Parallel FFT
      13             : !>      28-Feb-2002 (JGH) Clebsch-Gordon Coefficients
      14             : !>      06-Jun-2003 (JGH) Real space grid test
      15             : !>      Eigensolver test (29.08.05,MK)
      16             : !> \author JGH  6-NOV-2000
      17             : ! **************************************************************************************************
      18             : MODULE library_tests
      19             : 
      20             :    USE ai_coulomb_test,                 ONLY: eri_test
      21             :    USE cell_methods,                    ONLY: cell_create,&
      22             :                                               init_cell
      23             :    USE cell_types,                      ONLY: cell_release,&
      24             :                                               cell_type
      25             :    USE cg_test,                         ONLY: clebsch_gordon_test
      26             :    USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
      27             :                                               cp_blacs_env_release,&
      28             :                                               cp_blacs_env_type
      29             :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_perf_acc_test
      30             :    USE cp_files,                        ONLY: close_file,&
      31             :                                               open_file
      32             :    USE cp_fm_basic_linalg,              ONLY: cp_fm_gemm
      33             :    USE cp_fm_diag,                      ONLY: cp_fm_syevd,&
      34             :                                               cp_fm_syevx
      35             :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      36             :                                               cp_fm_struct_get,&
      37             :                                               cp_fm_struct_release,&
      38             :                                               cp_fm_struct_type
      39             :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      40             :                                               cp_fm_pilaenv,&
      41             :                                               cp_fm_release,&
      42             :                                               cp_fm_set_all,&
      43             :                                               cp_fm_set_submatrix,&
      44             :                                               cp_fm_to_fm,&
      45             :                                               cp_fm_type
      46             :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      47             :                                               cp_logger_type
      48             :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      49             :                                               cp_print_key_unit_nr
      50             :    USE cp_realspace_grid_init,          ONLY: init_input_type
      51             :    USE dbcsr_api,                       ONLY: dbcsr_reset_randmat_seed,&
      52             :                                               dbcsr_run_tests
      53             :    USE dbm_tests,                       ONLY: dbm_run_tests
      54             :    USE fft_tools,                       ONLY: BWFFT,&
      55             :                                               FFT_RADIX_CLOSEST,&
      56             :                                               FWFFT,&
      57             :                                               fft3d,&
      58             :                                               fft_radix_operations,&
      59             :                                               finalize_fft,&
      60             :                                               init_fft
      61             :    USE global_types,                    ONLY: global_environment_type
      62             :    USE input_constants,                 ONLY: do_diag_syevd,&
      63             :                                               do_diag_syevx,&
      64             :                                               do_mat_random,&
      65             :                                               do_mat_read,&
      66             :                                               do_pwgrid_ns_fullspace,&
      67             :                                               do_pwgrid_ns_halfspace,&
      68             :                                               do_pwgrid_spherical
      69             :    USE input_section_types,             ONLY: section_vals_get,&
      70             :                                               section_vals_get_subs_vals,&
      71             :                                               section_vals_type,&
      72             :                                               section_vals_val_get
      73             :    USE kinds,                           ONLY: dp
      74             :    USE machine,                         ONLY: m_flush,&
      75             :                                               m_walltime
      76             :    USE message_passing,                 ONLY: mp_para_env_type
      77             :    USE minimax_exp,                     ONLY: validate_exp_minimax
      78             :    USE mp2_grids,                       ONLY: test_least_square_ft
      79             :    USE mp_perf_test,                    ONLY: mpi_perf_test
      80             :    USE parallel_gemm_api,               ONLY: parallel_gemm
      81             :    USE parallel_rng_types,              ONLY: UNIFORM,&
      82             :                                               rng_stream_type
      83             :    USE pw_grid_types,                   ONLY: FULLSPACE,&
      84             :                                               HALFSPACE,&
      85             :                                               pw_grid_type
      86             :    USE pw_grids,                        ONLY: pw_grid_create,&
      87             :                                               pw_grid_release,&
      88             :                                               pw_grid_setup
      89             :    USE pw_methods,                      ONLY: pw_transfer,&
      90             :                                               pw_zero
      91             :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      92             :                                               pw_c3d_rs_type,&
      93             :                                               pw_r3d_rs_type
      94             :    USE realspace_grid_types,            ONLY: &
      95             :         realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type, rs_grid_create, &
      96             :         rs_grid_create_descriptor, rs_grid_print, rs_grid_release, rs_grid_release_descriptor, &
      97             :         rs_grid_zero, transfer_pw2rs, transfer_rs2pw
      98             :    USE shg_integrals_test,              ONLY: shg_integrals_perf_acc_test
      99             : #include "./base/base_uses.f90"
     100             : 
     101             :    IMPLICIT NONE
     102             : 
     103             :    PRIVATE
     104             :    PUBLIC :: lib_test
     105             : 
     106             :    INTEGER                  :: runtest(100)
     107             :    REAL(KIND=dp)           :: max_memory
     108             :    REAL(KIND=dp), PARAMETER :: threshold = 1.0E-8_dp
     109             :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
     110             : 
     111             : CONTAINS
     112             : 
     113             : ! **************************************************************************************************
     114             : !> \brief Master routine for tests
     115             : !> \param root_section ...
     116             : !> \param para_env ...
     117             : !> \param globenv ...
     118             : !> \par History
     119             : !>      none
     120             : !> \author JGH  6-NOV-2000
     121             : ! **************************************************************************************************
     122         720 :    SUBROUTINE lib_test(root_section, para_env, globenv)
     123             : 
     124             :       TYPE(section_vals_type), POINTER                   :: root_section
     125             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     126             :       TYPE(global_environment_type), POINTER             :: globenv
     127             : 
     128             :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'lib_test'
     129             : 
     130             :       INTEGER                                            :: handle, iw
     131             :       LOGICAL                                            :: explicit
     132             :       TYPE(cp_logger_type), POINTER                      :: logger
     133             :       TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
     134             :          dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
     135             :          rs_pw_transfer_section, shg_integrals_test_section
     136             : 
     137          80 :       CALL timeset(routineN, handle)
     138             : 
     139          80 :       logger => cp_get_default_logger()
     140          80 :       iw = cp_print_key_unit_nr(logger, root_section, "TEST%PROGRAM_RUN_INFO", extension=".log")
     141             : 
     142          80 :       IF (iw > 0) THEN
     143          40 :          WRITE (iw, '(T2,79("*"))')
     144          40 :          WRITE (iw, '(A,T31,A,T80,A)') ' *', ' PERFORMANCE TESTS ', '*'
     145          40 :          WRITE (iw, '(T2,79("*"))')
     146             :       END IF
     147             :       !
     148          80 :       CALL test_input(root_section, para_env)
     149             :       !
     150          80 :       IF (runtest(1) /= 0) CALL copy_test(para_env, iw)
     151             :       !
     152          80 :       IF (runtest(2) /= 0) CALL matmul_test(para_env, test_matmul=.TRUE., test_dgemm=.FALSE., iw=iw)
     153          80 :       IF (runtest(5) /= 0) CALL matmul_test(para_env, test_matmul=.FALSE., test_dgemm=.TRUE., iw=iw)
     154             :       !
     155          80 :       IF (runtest(3) /= 0) CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
     156           2 :                                          globenv%fftw_wisdom_file_name)
     157             :       !
     158          80 :       IF (runtest(4) /= 0) CALL eri_test(iw)
     159             :       !
     160          80 :       IF (runtest(6) /= 0) CALL clebsch_gordon_test()
     161             :       !
     162             :       ! runtest 7 has been deleted and can be recycled
     163             :       !
     164          80 :       IF (runtest(8) /= 0) CALL mpi_perf_test(para_env, runtest(8), iw)
     165             :       !
     166          80 :       IF (runtest(10) /= 0) CALL validate_exp_minimax(runtest(10), iw)
     167             :       !
     168          80 :       IF (runtest(11) /= 0) CALL test_least_square_ft(runtest(11), iw)
     169             :       !
     170             : 
     171          80 :       rs_pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%RS_PW_TRANSFER")
     172          80 :       CALL section_vals_get(rs_pw_transfer_section, explicit=explicit)
     173          80 :       IF (explicit) THEN
     174           2 :          CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
     175             :       END IF
     176             : 
     177          80 :       pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%PW_TRANSFER")
     178          80 :       CALL section_vals_get(pw_transfer_section, explicit=explicit)
     179          80 :       IF (explicit) THEN
     180          10 :          CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
     181             :       END IF
     182             : 
     183          80 :       cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_FM_GEMM")
     184          80 :       CALL section_vals_get(cp_fm_gemm_test_section, explicit=explicit)
     185          80 :       IF (explicit) THEN
     186           4 :          CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
     187             :       END IF
     188             : 
     189          80 :       eigensolver_section => section_vals_get_subs_vals(root_section, "TEST%EIGENSOLVER")
     190          80 :       CALL section_vals_get(eigensolver_section, explicit=explicit)
     191          80 :       IF (explicit) THEN
     192           2 :          CALL eigensolver_test(para_env, iw, eigensolver_section)
     193             :       END IF
     194             : 
     195          80 :       eri_mme_test_section => section_vals_get_subs_vals(root_section, "TEST%ERI_MME_TEST")
     196          80 :       CALL section_vals_get(eri_mme_test_section, explicit=explicit)
     197          80 :       IF (explicit) THEN
     198           8 :          CALL cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
     199             :       END IF
     200             : 
     201          80 :       shg_integrals_test_section => section_vals_get_subs_vals(root_section, "TEST%SHG_INTEGRALS_TEST")
     202          80 :       CALL section_vals_get(shg_integrals_test_section, explicit=explicit)
     203          80 :       IF (explicit) THEN
     204           4 :          CALL shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
     205             :       END IF
     206             : 
     207             :       ! DBCSR tests
     208          80 :       cp_dbcsr_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_DBCSR")
     209          80 :       CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit)
     210          80 :       IF (explicit) THEN
     211          32 :          CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
     212             :       END IF
     213             : 
     214             :       ! DBM tests
     215          80 :       dbm_test_section => section_vals_get_subs_vals(root_section, "TEST%DBM")
     216          80 :       CALL section_vals_get(dbm_test_section, explicit=explicit)
     217          80 :       IF (explicit) THEN
     218          14 :          CALL run_dbm_tests(para_env, iw, dbm_test_section)
     219             :       END IF
     220             : 
     221          80 :       CALL cp_print_key_finished_output(iw, logger, root_section, "TEST%PROGRAM_RUN_INFO")
     222             : 
     223          80 :       CALL timestop(handle)
     224             : 
     225          80 :    END SUBROUTINE lib_test
     226             : 
     227             : ! **************************************************************************************************
     228             : !> \brief Reads input section &TEST ... &END
     229             : !> \param root_section ...
     230             : !> \param para_env ...
     231             : !> \author JGH 30-NOV-2000
     232             : !> \note
     233             : !> I---------------------------------------------------------------------------I
     234             : !> I SECTION: &TEST ... &END                                                   I
     235             : !> I                                                                           I
     236             : !> I    MEMORY   max_memory                                                    I
     237             : !> I    COPY     n                                                             I
     238             : !> I    MATMUL   n                                                             I
     239             : !> I    FFT      n                                                             I
     240             : !> I    ERI      n                                                             I
     241             : !> I    PW_FFT   n                                                             I
     242             : !> I    Clebsch-Gordon n                                                       I
     243             : !> I    RS_GRIDS n                                                             I
     244             : !> I    MPI      n                                                             I
     245             : !> I    RNG      n             -> Parallel random number generator             I
     246             : !> I---------------------------------------------------------------------------I
     247             : ! **************************************************************************************************
     248          80 :    SUBROUTINE test_input(root_section, para_env)
     249             :       TYPE(section_vals_type), POINTER                   :: root_section
     250             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     251             : 
     252             :       TYPE(section_vals_type), POINTER                   :: test_section
     253             : 
     254             : !
     255             : !..defaults
     256             : ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
     257             : 
     258          80 :       runtest = 0
     259          80 :       test_section => section_vals_get_subs_vals(root_section, "TEST")
     260          80 :       CALL section_vals_val_get(test_section, "MEMORY", r_val=max_memory)
     261          80 :       CALL section_vals_val_get(test_section, 'COPY', i_val=runtest(1))
     262          80 :       CALL section_vals_val_get(test_section, 'MATMUL', i_val=runtest(2))
     263          80 :       CALL section_vals_val_get(test_section, 'DGEMM', i_val=runtest(5))
     264          80 :       CALL section_vals_val_get(test_section, 'FFT', i_val=runtest(3))
     265          80 :       CALL section_vals_val_get(test_section, 'ERI', i_val=runtest(4))
     266          80 :       CALL section_vals_val_get(test_section, 'CLEBSCH_GORDON', i_val=runtest(6))
     267          80 :       CALL section_vals_val_get(test_section, 'MPI', i_val=runtest(8))
     268          80 :       CALL section_vals_val_get(test_section, 'MINIMAX', i_val=runtest(10))
     269          80 :       CALL section_vals_val_get(test_section, 'LEAST_SQ_FT', i_val=runtest(11))
     270             : 
     271          80 :       CALL para_env%sync()
     272          80 :    END SUBROUTINE test_input
     273             : 
     274             : ! **************************************************************************************************
     275             : !> \brief Tests the performance to copy two vectors.
     276             : !> \param para_env ...
     277             : !> \param iw ...
     278             : !> \par History
     279             : !>      none
     280             : !> \author JGH  6-NOV-2000
     281             : !> \note
     282             : !>      The results of these tests allow to determine the size of the cache
     283             : !>      of the CPU. This can be used to optimize the performance of the
     284             : !>      FFTSG library.
     285             : ! **************************************************************************************************
     286           2 :    SUBROUTINE copy_test(para_env, iw)
     287             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     288             :       INTEGER                                            :: iw
     289             : 
     290             :       INTEGER                                            :: i, ierr, j, len, ntim, siz
     291             :       REAL(KIND=dp)                                      :: perf, t, tend, tstart
     292           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: ca, cb
     293             : 
     294             : ! test for copy --> Cache size
     295             : 
     296           2 :       siz = ABS(runtest(1))
     297           2 :       IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of copy ( F95 ) "
     298          34 :       DO i = 6, 24
     299          34 :          len = 2**i
     300          34 :          IF (8.0_dp*REAL(len, KIND=dp) > max_memory*0.5_dp) EXIT
     301          96 :          ALLOCATE (ca(len), STAT=ierr)
     302          32 :          IF (ierr /= 0) EXIT
     303          96 :          ALLOCATE (cb(len), STAT=ierr)
     304          32 :          IF (ierr /= 0) EXIT
     305             : 
     306          32 :          CALL RANDOM_NUMBER(ca)
     307          32 :          ntim = NINT(1.e7_dp/REAL(len, KIND=dp))
     308          32 :          ntim = MAX(ntim, 1)
     309          32 :          ntim = MIN(ntim, siz*10000)
     310             : 
     311          32 :          tstart = m_walltime()
     312      512524 :          DO j = 1, ntim
     313   315058412 :             cb(:) = ca(:)
     314      512524 :             ca(1) = REAL(j, KIND=dp)
     315             :          END DO
     316          32 :          tend = m_walltime()
     317          32 :          t = tend - tstart + threshold
     318          32 :          IF (t > 0.0_dp) THEN
     319          32 :             perf = REAL(ntim, KIND=dp)*REAL(len, KIND=dp)*1.e-6_dp/t
     320             :          ELSE
     321           0 :             perf = 0.0_dp
     322             :          END IF
     323             : 
     324          32 :          IF (para_env%is_source()) THEN
     325          16 :             WRITE (iw, '(A,i2,i10,A,T59,F14.4,A)') " Copy test:   Size = 2^", i, &
     326          32 :                len/1024, " Kwords", perf, " Mcopy/s"
     327             :          END IF
     328             : 
     329          32 :          DEALLOCATE (ca)
     330          34 :          DEALLOCATE (cb)
     331             :       END DO
     332           2 :       CALL para_env%sync()
     333           2 :    END SUBROUTINE copy_test
     334             : 
     335             : ! **************************************************************************************************
     336             : !> \brief Tests the performance of different kinds of matrix matrix multiply
     337             : !>      kernels for the BLAS and F95 intrinsic matmul.
     338             : !> \param para_env ...
     339             : !> \param test_matmul ...
     340             : !> \param test_dgemm ...
     341             : !> \param iw ...
     342             : !> \par History
     343             : !>      none
     344             : !> \author JGH  6-NOV-2000
     345             : ! **************************************************************************************************
     346           2 :    SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
     347             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     348             :       LOGICAL                                            :: test_matmul, test_dgemm
     349             :       INTEGER                                            :: iw
     350             : 
     351             :       INTEGER                                            :: i, ierr, j, len, ntim, siz
     352             :       REAL(KIND=dp)                                      :: perf, t, tend, tstart, xdum
     353           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: ma, mb, mc
     354             : 
     355             : ! test for matrix multpies
     356             : 
     357           2 :       IF (test_matmul) THEN
     358           2 :          siz = ABS(runtest(2))
     359           2 :          IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( F95 ) "
     360           2 :          DO i = 5, siz, 2
     361           4 :             len = 2**i + 1
     362           4 :             IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
     363          16 :             ALLOCATE (ma(len, len), STAT=ierr)
     364           4 :             IF (ierr /= 0) EXIT
     365          16 :             ALLOCATE (mb(len, len), STAT=ierr)
     366           4 :             IF (ierr /= 0) EXIT
     367          16 :             ALLOCATE (mc(len, len), STAT=ierr)
     368           4 :             IF (ierr /= 0) EXIT
     369       35788 :             mc = 0.0_dp
     370             : 
     371           4 :             CALL RANDOM_NUMBER(xdum)
     372       35788 :             ma = xdum
     373           4 :             CALL RANDOM_NUMBER(xdum)
     374       35788 :             mb = xdum
     375           4 :             ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
     376           4 :             ntim = MAX(ntim, 1)
     377           4 :             ntim = MIN(ntim, siz*200)
     378           4 :             tstart = m_walltime()
     379        2832 :             DO j = 1, ntim
     380        2828 :                mc(:, :) = MATMUL(ma, mb)
     381        2832 :                ma(1, 1) = REAL(j, KIND=dp)
     382             :             END DO
     383           4 :             tend = m_walltime()
     384           4 :             t = tend - tstart + threshold
     385           4 :             perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     386           4 :             IF (para_env%is_source()) THEN
     387             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     388           2 :                   " Matrix multiply test: c = a * b         Size = ", len, perf, " Mflop/s"
     389             :             END IF
     390           4 :             tstart = m_walltime()
     391        2832 :             DO j = 1, ntim
     392     3895652 :                mc(:, :) = mc + MATMUL(ma, mb)
     393        2832 :                ma(1, 1) = REAL(j, KIND=dp)
     394             :             END DO
     395           4 :             tend = m_walltime()
     396           4 :             t = tend - tstart + threshold
     397           4 :             IF (t > 0.0_dp) THEN
     398           4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     399             :             ELSE
     400           0 :                perf = 0.0_dp
     401             :             END IF
     402             : 
     403           4 :             IF (para_env%is_source()) THEN
     404             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     405           2 :                   " Matrix multiply test: a = a * b         Size = ", len, perf, " Mflop/s"
     406             :             END IF
     407             : 
     408           4 :             tstart = m_walltime()
     409        2832 :             DO j = 1, ntim
     410     3895652 :                mc(:, :) = mc + MATMUL(ma, TRANSPOSE(mb))
     411        2832 :                ma(1, 1) = REAL(j, KIND=dp)
     412             :             END DO
     413           4 :             tend = m_walltime()
     414           4 :             t = tend - tstart + threshold
     415           4 :             IF (t > 0.0_dp) THEN
     416           4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     417             :             ELSE
     418           0 :                perf = 0.0_dp
     419             :             END IF
     420             : 
     421           4 :             IF (para_env%is_source()) THEN
     422             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     423           2 :                   " Matrix multiply test: c = a * b(T)      Size = ", len, perf, " Mflop/s"
     424             :             END IF
     425             : 
     426           4 :             tstart = m_walltime()
     427        2832 :             DO j = 1, ntim
     428     3895652 :                mc(:, :) = mc + MATMUL(TRANSPOSE(ma), mb)
     429        2832 :                ma(1, 1) = REAL(j, KIND=dp)
     430             :             END DO
     431           4 :             tend = m_walltime()
     432           4 :             t = tend - tstart + threshold
     433           4 :             IF (t > 0.0_dp) THEN
     434           4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     435             :             ELSE
     436           0 :                perf = 0.0_dp
     437             :             END IF
     438             : 
     439           4 :             IF (para_env%is_source()) THEN
     440             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     441           2 :                   " Matrix multiply test: c = a(T) * b      Size = ", len, perf, " Mflop/s"
     442             :             END IF
     443             : 
     444           4 :             DEALLOCATE (ma)
     445           4 :             DEALLOCATE (mb)
     446           4 :             DEALLOCATE (mc)
     447             :          END DO
     448             :       END IF
     449             : 
     450             :       ! test for matrix multpies
     451           2 :       IF (test_dgemm) THEN
     452           0 :          siz = ABS(runtest(5))
     453           0 :          IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( BLAS ) "
     454           0 :          DO i = 5, siz, 2
     455           0 :             len = 2**i + 1
     456           0 :             IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
     457           0 :             ALLOCATE (ma(len, len), STAT=ierr)
     458           0 :             IF (ierr /= 0) EXIT
     459           0 :             ALLOCATE (mb(len, len), STAT=ierr)
     460           0 :             IF (ierr /= 0) EXIT
     461           0 :             ALLOCATE (mc(len, len), STAT=ierr)
     462           0 :             IF (ierr /= 0) EXIT
     463           0 :             mc = 0.0_dp
     464             : 
     465           0 :             CALL RANDOM_NUMBER(xdum)
     466           0 :             ma = xdum
     467           0 :             CALL RANDOM_NUMBER(xdum)
     468           0 :             mb = xdum
     469           0 :             ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
     470           0 :             ntim = MAX(ntim, 1)
     471           0 :             ntim = MIN(ntim, 1000)
     472             : 
     473           0 :             tstart = m_walltime()
     474           0 :             DO j = 1, ntim
     475           0 :                CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     476             :             END DO
     477           0 :             tend = m_walltime()
     478           0 :             t = tend - tstart + threshold
     479           0 :             IF (t > 0.0_dp) THEN
     480           0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     481             :             ELSE
     482           0 :                perf = 0.0_dp
     483             :             END IF
     484             : 
     485           0 :             IF (para_env%is_source()) THEN
     486             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     487           0 :                   " Matrix multiply test: c = a * b         Size = ", len, perf, " Mflop/s"
     488             :             END IF
     489             : 
     490           0 :             tstart = m_walltime()
     491           0 :             DO j = 1, ntim
     492           0 :                CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     493             :             END DO
     494           0 :             tend = m_walltime()
     495           0 :             t = tend - tstart + threshold
     496           0 :             IF (t > 0.0_dp) THEN
     497           0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     498             :             ELSE
     499           0 :                perf = 0.0_dp
     500             :             END IF
     501             : 
     502           0 :             IF (para_env%is_source()) THEN
     503             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     504           0 :                   " Matrix multiply test: a = a * b         Size = ", len, perf, " Mflop/s"
     505             :             END IF
     506             : 
     507           0 :             tstart = m_walltime()
     508           0 :             DO j = 1, ntim
     509           0 :                CALL dgemm("N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     510             :             END DO
     511           0 :             tend = m_walltime()
     512           0 :             t = tend - tstart + threshold
     513           0 :             IF (t > 0.0_dp) THEN
     514           0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     515             :             ELSE
     516           0 :                perf = 0.0_dp
     517             :             END IF
     518             : 
     519           0 :             IF (para_env%is_source()) THEN
     520             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     521           0 :                   " Matrix multiply test: c = a * b(T)      Size = ", len, perf, " Mflop/s"
     522             :             END IF
     523             : 
     524           0 :             tstart = m_walltime()
     525           0 :             DO j = 1, ntim
     526           0 :                CALL dgemm("T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     527             :             END DO
     528           0 :             tend = m_walltime()
     529           0 :             t = tend - tstart + threshold
     530           0 :             IF (t > 0.0_dp) THEN
     531           0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     532             :             ELSE
     533           0 :                perf = 0.0_dp
     534             :             END IF
     535             : 
     536           0 :             IF (para_env%is_source()) THEN
     537             :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     538           0 :                   " Matrix multiply test: c = a(T) * b      Size = ", len, perf, " Mflop/s"
     539             :             END IF
     540             : 
     541           0 :             DEALLOCATE (ma)
     542           0 :             DEALLOCATE (mb)
     543           0 :             DEALLOCATE (mc)
     544             :          END DO
     545             :       END IF
     546             : 
     547           2 :       CALL para_env%sync()
     548             : 
     549           2 :    END SUBROUTINE matmul_test
     550             : 
     551             : ! **************************************************************************************************
     552             : !> \brief Tests the performance of all available FFT libraries for 3D FFTs
     553             : !> \param para_env ...
     554             : !> \param iw ...
     555             : !> \param fftw_plan_type ...
     556             : !> \param wisdom_file where FFTW3 should look to save/load wisdom
     557             : !> \par History
     558             : !>      none
     559             : !> \author JGH  6-NOV-2000
     560             : ! **************************************************************************************************
     561           2 :    SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
     562             : 
     563             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     564             :       INTEGER                                            :: iw, fftw_plan_type
     565             :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     566             : 
     567             :       INTEGER, PARAMETER                                 :: ndate(3) = (/12, 48, 96/)
     568             : 
     569             :       INTEGER                                            :: iall, ierr, it, j, len, n(3), ntim, &
     570             :                                                             radix_in, radix_out, siz, stat
     571             :       COMPLEX(KIND=dp), DIMENSION(4, 4, 4)               :: zz
     572           2 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)  :: ca, cb, cc
     573             :       CHARACTER(LEN=7)                                   :: method
     574             :       REAL(KIND=dp)                                      :: flops, perf, scale, t, tdiff, tend, &
     575             :                                                             tstart
     576           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: ra
     577             : 
     578             : ! test for 3d FFT
     579             : 
     580           2 :       IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of 3D-FFT "
     581           2 :       siz = ABS(runtest(3))
     582             : 
     583           8 :       DO iall = 1, 100
     584             :          SELECT CASE (iall)
     585             :          CASE DEFAULT
     586           2 :             EXIT
     587             :          CASE (1)
     588             :             CALL init_fft("FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
     589           2 :                           pool_limit=10, plan_style=fftw_plan_type)
     590           2 :             method = "FFTSG  "
     591             :          CASE (2)
     592           2 :             CYCLE
     593             :          CASE (3)
     594             :             CALL init_fft("FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
     595           2 :                           pool_limit=10, plan_style=fftw_plan_type)
     596          10 :             method = "FFTW3  "
     597             :          END SELECT
     598          16 :          n = 4
     599           4 :          zz = 0.0_dp
     600           4 :          CALL fft3d(1, n, zz, status=stat)
     601           4 :          IF (stat == 0) THEN
     602          16 :             DO it = 1, 3
     603          12 :                radix_in = ndate(it)
     604          12 :                CALL fft_radix_operations(radix_in, radix_out, FFT_RADIX_CLOSEST)
     605          12 :                len = radix_out
     606          48 :                n = len
     607          12 :                IF (16.0_dp*REAL(len*len*len, KIND=dp) > max_memory*0.5_dp) EXIT
     608          60 :                ALLOCATE (ra(len, len, len), STAT=ierr)
     609          60 :                ALLOCATE (ca(len, len, len), STAT=ierr)
     610          12 :                CALL RANDOM_NUMBER(ra)
     611     4035516 :                ca(:, :, :) = ra
     612          12 :                CALL RANDOM_NUMBER(ra)
     613     4035516 :                ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
     614          12 :                flops = REAL(len**3, KIND=dp)*15.0_dp*LOG(REAL(len, KIND=dp))
     615          12 :                ntim = NINT(siz*1.e7_dp/flops)
     616          12 :                ntim = MAX(ntim, 1)
     617          12 :                ntim = MIN(ntim, 200)
     618          12 :                scale = 1.0_dp/REAL(len**3, KIND=dp)
     619          12 :                tstart = m_walltime()
     620         884 :                DO j = 1, ntim
     621         872 :                   CALL fft3d(FWFFT, n, ca)
     622         884 :                   CALL fft3d(BWFFT, n, ca, SCALE=scale)
     623             :                END DO
     624          12 :                tend = m_walltime()
     625          12 :                t = tend - tstart + threshold
     626          12 :                IF (t > 0.0_dp) THEN
     627          12 :                   perf = REAL(ntim, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
     628             :                ELSE
     629           0 :                   perf = 0.0_dp
     630             :                END IF
     631             : 
     632          12 :                IF (para_env%is_source()) THEN
     633             :                   WRITE (iw, '(T2,A,A,i6,T59,F14.4,A)') &
     634           6 :                      ADJUSTR(method), " test (in-place)    Size = ", len, perf, " Mflop/s"
     635             :                END IF
     636          12 :                DEALLOCATE (ca)
     637          16 :                DEALLOCATE (ra)
     638             :             END DO
     639           4 :             IF (para_env%is_source()) WRITE (iw, *)
     640             :             ! test if input data is preserved
     641           4 :             len = 24
     642          16 :             n = len
     643           4 :             ALLOCATE (ra(len, len, len))
     644           4 :             ALLOCATE (ca(len, len, len))
     645           4 :             ALLOCATE (cb(len, len, len))
     646           4 :             ALLOCATE (cc(len, len, len))
     647           4 :             CALL RANDOM_NUMBER(ra)
     648       57700 :             ca(:, :, :) = ra
     649           4 :             CALL RANDOM_NUMBER(ra)
     650       57700 :             ca(:, :, :) = ca + CMPLX(0.0_dp, 1.0_dp, KIND=dp)*ra
     651       57700 :             cc(:, :, :) = ca
     652           4 :             CALL fft3d(FWFFT, n, ca, cb)
     653       57700 :             tdiff = MAXVAL(ABS(ca - cc))
     654           4 :             IF (tdiff > 1.0E-12_dp) THEN
     655           0 :                IF (para_env%is_source()) &
     656           0 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         FWFFT ", &
     657           0 :                   "             Input array is changed in out-of-place FFT !"
     658             :             ELSE
     659           4 :                IF (para_env%is_source()) &
     660           2 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         FWFFT ", &
     661           4 :                   "         Input array is not changed in out-of-place FFT !"
     662             :             END IF
     663       57700 :             ca(:, :, :) = cc
     664           4 :             CALL fft3d(BWFFT, n, ca, cb)
     665       57700 :             tdiff = MAXVAL(ABS(ca - cc))
     666           4 :             IF (tdiff > 1.0E-12_dp) THEN
     667           0 :                IF (para_env%is_source()) &
     668           0 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         BWFFT ", &
     669           0 :                   "             Input array is changed in out-of-place FFT !"
     670             :             ELSE
     671           4 :                IF (para_env%is_source()) &
     672           2 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         BWFFT ", &
     673           4 :                   "         Input array is not changed in out-of-place FFT !"
     674             :             END IF
     675           4 :             IF (para_env%is_source()) WRITE (iw, *)
     676             : 
     677           4 :             DEALLOCATE (ra)
     678           4 :             DEALLOCATE (ca)
     679           4 :             DEALLOCATE (cb)
     680           4 :             DEALLOCATE (cc)
     681             :          END IF
     682           4 :          CALL finalize_fft(para_env, wisdom_file=wisdom_file)
     683             :       END DO
     684             : 
     685           2 :    END SUBROUTINE fft_test
     686             : 
     687             : ! **************************************************************************************************
     688             : !> \brief   test rs_pw_transfer performance
     689             : !> \param para_env ...
     690             : !> \param iw ...
     691             : !> \param globenv ...
     692             : !> \param rs_pw_transfer_section ...
     693             : !> \author  Joost VandeVondele
     694             : !>      9.2008 Randomise rs grid [Iain Bethune]
     695             : !>      (c) The Numerical Algorithms Group (NAG) Ltd, 2008 on behalf of the HECToR project
     696             : ! **************************************************************************************************
     697           2 :    SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
     698             : 
     699             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     700             :       INTEGER                                            :: iw
     701             :       TYPE(global_environment_type), POINTER             :: globenv
     702             :       TYPE(section_vals_type), POINTER                   :: rs_pw_transfer_section
     703             : 
     704             :       CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test'
     705             : 
     706             :       INTEGER                                            :: halo_size, handle, i_loop, n_loop, ns_max
     707             :       INTEGER, DIMENSION(3)                              :: no, np
     708           2 :       INTEGER, DIMENSION(:), POINTER                     :: i_vals
     709             :       LOGICAL                                            :: do_rs2pw
     710             :       REAL(KIND=dp)                                      :: tend, tstart
     711             :       TYPE(cell_type), POINTER                           :: box
     712             :       TYPE(pw_grid_type), POINTER                        :: grid
     713             :       TYPE(pw_r3d_rs_type)                               :: ca
     714             :       TYPE(realspace_grid_desc_type), POINTER            :: rs_desc
     715             :       TYPE(realspace_grid_input_type)                    :: input_settings
     716          32 :       TYPE(realspace_grid_type)                          :: rs_grid
     717             :       TYPE(section_vals_type), POINTER                   :: rs_grid_section
     718             : 
     719           2 :       CALL timeset(routineN, handle)
     720             : 
     721             :       !..set fft lib
     722             :       CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
     723             :                     pool_limit=globenv%fft_pool_scratch_limit, &
     724             :                     wisdom_file=globenv%fftw_wisdom_file_name, &
     725           2 :                     plan_style=globenv%fftw_plan_type)
     726             : 
     727             :       ! .. set cell (should otherwise be irrelevant)
     728           2 :       NULLIFY (box)
     729           2 :       CALL cell_create(box)
     730             :       box%hmat = RESHAPE((/20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
     731          26 :                            0.0_dp, 0.0_dp, 20.0_dp/), (/3, 3/))
     732           2 :       CALL init_cell(box)
     733             : 
     734             :       ! .. grid type and pw_grid
     735           2 :       NULLIFY (grid)
     736           2 :       CALL section_vals_val_get(rs_pw_transfer_section, "GRID", i_vals=i_vals)
     737           8 :       np = i_vals
     738           2 :       CALL pw_grid_create(grid, para_env)
     739           2 :       CALL pw_grid_setup(box%hmat, grid, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw)
     740           8 :       no = grid%npts
     741             : 
     742           2 :       CALL ca%create(grid)
     743           2 :       CALL pw_zero(ca)
     744             : 
     745             :       ! .. rs input setting type
     746           2 :       CALL section_vals_val_get(rs_pw_transfer_section, "HALO_SIZE", i_val=halo_size)
     747           2 :       rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section, "RS_GRID")
     748           2 :       ns_max = 2*halo_size + 1
     749           2 :       CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, (/-1, -1, -1/))
     750             : 
     751             :       ! .. rs type
     752           2 :       NULLIFY (rs_desc)
     753           2 :       CALL rs_grid_create_descriptor(rs_desc, pw_grid=grid, input_settings=input_settings)
     754           2 :       CALL rs_grid_create(rs_grid, rs_desc)
     755           2 :       CALL rs_grid_print(rs_grid, iw)
     756           2 :       CALL rs_grid_zero(rs_grid)
     757             : 
     758             :       ! Put random values on the grid, so summation check will pick up errors
     759           2 :       CALL RANDOM_NUMBER(rs_grid%r)
     760             : 
     761           2 :       CALL section_vals_val_get(rs_pw_transfer_section, "N_loop", i_val=N_loop)
     762           2 :       CALL section_vals_val_get(rs_pw_transfer_section, "RS2PW", l_val=do_rs2pw)
     763             : 
     764             :       ! go for the real loops, sync to get max timings
     765           2 :       IF (para_env%is_source()) THEN
     766           1 :          WRITE (iw, '(T2,A)') ""
     767           1 :          WRITE (iw, '(T2,A)') "Timing rs_pw_transfer routine"
     768           1 :          WRITE (iw, '(T2,A)') ""
     769           1 :          WRITE (iw, '(T2,A)') "iteration      time[s]"
     770             :       END IF
     771           8 :       DO i_loop = 1, N_loop
     772           6 :          CALL para_env%sync()
     773           6 :          tstart = m_walltime()
     774           6 :          IF (do_rs2pw) THEN
     775           6 :             CALL transfer_rs2pw(rs_grid, ca)
     776             :          ELSE
     777           0 :             CALL transfer_pw2rs(rs_grid, ca)
     778             :          END IF
     779           6 :          CALL para_env%sync()
     780           6 :          tend = m_walltime()
     781           8 :          IF (para_env%is_source()) THEN
     782           3 :             WRITE (iw, '(T2,I9,1X,F12.6)') i_loop, tend - tstart
     783             :          END IF
     784             :       END DO
     785             : 
     786             :       !cleanup
     787           2 :       CALL rs_grid_release(rs_grid)
     788           2 :       CALL rs_grid_release_descriptor(rs_desc)
     789           2 :       CALL ca%release()
     790           2 :       CALL pw_grid_release(grid)
     791           2 :       CALL cell_release(box)
     792           2 :       CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
     793             : 
     794           2 :       CALL timestop(handle)
     795             : 
     796          10 :    END SUBROUTINE rs_pw_transfer_test
     797             : 
     798             : ! **************************************************************************************************
     799             : !> \brief Tests the performance of PW calls to FFT routines
     800             : !> \param para_env ...
     801             : !> \param iw ...
     802             : !> \param globenv ...
     803             : !> \param pw_transfer_section ...
     804             : !> \par History
     805             : !>      JGH  6-Feb-2001 : Test and performance code
     806             : !>      Made input sensitive [Joost VandeVondele]
     807             : !> \author JGH  1-JAN-2001
     808             : ! **************************************************************************************************
     809          10 :    SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
     810             : 
     811             :       TYPE(mp_para_env_type), POINTER                    :: para_env
     812             :       INTEGER                                            :: iw
     813             :       TYPE(global_environment_type), POINTER             :: globenv
     814             :       TYPE(section_vals_type), POINTER                   :: pw_transfer_section
     815             : 
     816             :       REAL(KIND=dp), PARAMETER                           :: toler = 1.e-11_dp
     817             : 
     818             :       INTEGER                                            :: blocked_id, grid_span, i_layout, i_rep, &
     819             :                                                             ig, ip, itmp, n_loop, n_rep, nn, p, q
     820          10 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: layouts
     821             :       INTEGER, DIMENSION(2)                              :: distribution_layout
     822             :       INTEGER, DIMENSION(3)                              :: no, np
     823          10 :       INTEGER, DIMENSION(:), POINTER                     :: i_vals
     824             :       LOGICAL                                            :: debug, is_fullspace, odd, &
     825             :                                                             pw_grid_layout_all, spherical
     826             :       REAL(KIND=dp)                                      :: em, et, flops, gsq, perf, t, t_max, &
     827             :                                                             t_min, tend, tstart
     828          10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: t_end, t_start
     829             :       TYPE(cell_type), POINTER                           :: box
     830             :       TYPE(pw_c1d_gs_type)                               :: ca, cc
     831             :       TYPE(pw_c3d_rs_type)                               :: cb
     832             :       TYPE(pw_grid_type), POINTER                        :: grid
     833             : 
     834             : !..set fft lib
     835             : 
     836             :       CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
     837             :                     pool_limit=globenv%fft_pool_scratch_limit, &
     838             :                     wisdom_file=globenv%fftw_wisdom_file_name, &
     839          10 :                     plan_style=globenv%fftw_plan_type)
     840             : 
     841             :       !..the unit cell (should not really matter, the number of grid points do)
     842          10 :       NULLIFY (box, grid)
     843          10 :       CALL cell_create(box)
     844             :       box%hmat = RESHAPE((/10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
     845         130 :                            0.0_dp, 0.0_dp, 7.0_dp/), (/3, 3/))
     846          10 :       CALL init_cell(box)
     847             : 
     848          10 :       CALL section_vals_get(pw_transfer_section, n_repetition=n_rep)
     849          70 :       DO i_rep = 1, n_rep
     850             : 
     851             :          ! how often should we do the transfer
     852          60 :          CALL section_vals_val_get(pw_transfer_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
     853         180 :          ALLOCATE (t_start(N_loop))
     854         180 :          ALLOCATE (t_end(N_loop))
     855             : 
     856             :          ! setup of the grids
     857          60 :          CALL section_vals_val_get(pw_transfer_section, "GRID", i_rep_section=i_rep, i_vals=i_vals)
     858         240 :          np = i_vals
     859             : 
     860          60 :          CALL section_vals_val_get(pw_transfer_section, "PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
     861          60 :          CALL section_vals_val_get(pw_transfer_section, "DEBUG", i_rep_section=i_rep, l_val=debug)
     862             : 
     863             :          CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT_ALL", i_rep_section=i_rep, &
     864          60 :                                    l_val=pw_grid_layout_all)
     865             : 
     866             :          ! prepare to loop over all or a specific layout
     867          60 :          IF (pw_grid_layout_all) THEN
     868             :             ! count layouts that fit
     869           8 :             itmp = 0
     870             :             ! start from 2, (/1,para_env%num_pe/) is not supported
     871          16 :             DO p = 2, para_env%num_pe
     872           8 :                q = para_env%num_pe/p
     873          16 :                IF (p*q == para_env%num_pe) THEN
     874           8 :                   itmp = itmp + 1
     875             :                END IF
     876             :             END DO
     877             :             ! build list
     878          24 :             ALLOCATE (layouts(2, itmp))
     879           8 :             itmp = 0
     880          16 :             DO p = 2, para_env%num_pe
     881           8 :                q = para_env%num_pe/p
     882          16 :                IF (p*q == para_env%num_pe) THEN
     883           8 :                   itmp = itmp + 1
     884          24 :                   layouts(:, itmp) = (/p, q/)
     885             :                END IF
     886             :             END DO
     887             :          ELSE
     888          52 :             CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
     889          52 :             ALLOCATE (layouts(2, 1))
     890         156 :             layouts(:, 1) = i_vals
     891             :          END IF
     892             : 
     893         120 :          DO i_layout = 1, SIZE(layouts, 2)
     894             : 
     895         180 :             distribution_layout = layouts(:, i_layout)
     896             : 
     897          60 :             CALL pw_grid_create(grid, para_env)
     898             : 
     899          60 :             CALL section_vals_val_get(pw_transfer_section, "PW_GRID", i_rep_section=i_rep, i_val=itmp)
     900             : 
     901             :             ! from cp_control_utils
     902          16 :             SELECT CASE (itmp)
     903             :             CASE (do_pwgrid_spherical)
     904          16 :                spherical = .TRUE.
     905          16 :                is_fullspace = .FALSE.
     906             :             CASE (do_pwgrid_ns_fullspace)
     907          28 :                spherical = .FALSE.
     908          28 :                is_fullspace = .TRUE.
     909             :             CASE (do_pwgrid_ns_halfspace)
     910          16 :                spherical = .FALSE.
     911          60 :                is_fullspace = .FALSE.
     912             :             END SELECT
     913             : 
     914             :             ! from pw_env_methods
     915          60 :             IF (spherical) THEN
     916          16 :                grid_span = HALFSPACE
     917          16 :                spherical = .TRUE.
     918          16 :                odd = .TRUE.
     919          44 :             ELSE IF (is_fullspace) THEN
     920          28 :                grid_span = FULLSPACE
     921          28 :                spherical = .FALSE.
     922          28 :                odd = .FALSE.
     923             :             ELSE
     924          16 :                grid_span = HALFSPACE
     925          16 :                spherical = .FALSE.
     926          16 :                odd = .TRUE.
     927             :             END IF
     928             : 
     929             :             ! actual setup
     930             :             CALL pw_grid_setup(box%hmat, grid, grid_span=grid_span, odd=odd, spherical=spherical, &
     931             :                                blocked=blocked_id, npts=np, fft_usage=.TRUE., &
     932          60 :                                rs_dims=distribution_layout, iounit=iw)
     933             : 
     934          60 :             IF (iw > 0) CALL m_flush(iw)
     935             : 
     936             :             ! note that the number of grid points might be different from what the user requested (fft-able needed)
     937         240 :             no = grid%npts
     938             : 
     939          60 :             CALL ca%create(grid)
     940          60 :             CALL cb%create(grid)
     941          60 :             CALL cc%create(grid)
     942             : 
     943             :             ! initialize data
     944          60 :             CALL pw_zero(ca)
     945          60 :             CALL pw_zero(cb)
     946          60 :             CALL pw_zero(cc)
     947          60 :             nn = SIZE(ca%array)
     948      142042 :             DO ig = 1, nn
     949      141982 :                gsq = grid%gsq(ig)
     950      142042 :                ca%array(ig) = EXP(-gsq)
     951             :             END DO
     952             : 
     953         420 :             flops = PRODUCT(no)*30.0_dp*LOG(REAL(MAXVAL(no), KIND=dp))
     954          60 :             tstart = m_walltime()
     955         596 :             DO ip = 1, n_loop
     956         536 :                CALL para_env%sync()
     957         536 :                t_start(ip) = m_walltime()
     958         536 :                CALL pw_transfer(ca, cb, debug)
     959         536 :                CALL pw_transfer(cb, cc, debug)
     960         536 :                CALL para_env%sync()
     961         596 :                t_end(ip) = m_walltime()
     962             :             END DO
     963          60 :             tend = m_walltime()
     964          60 :             t = tend - tstart + threshold
     965          60 :             IF (t > 0.0_dp) THEN
     966          60 :                perf = REAL(n_loop, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
     967             :             ELSE
     968           0 :                perf = 0.0_dp
     969             :             END IF
     970             : 
     971      142102 :             em = MAXVAL(ABS(ca%array(:) - cc%array(:)))
     972          60 :             CALL para_env%max(em)
     973      142042 :             et = SUM(ABS(ca%array(:) - cc%array(:)))
     974          60 :             CALL para_env%sum(et)
     975         656 :             t_min = MINVAL(t_end - t_start)
     976         656 :             t_max = MAXVAL(t_end - t_start)
     977             : 
     978          60 :             IF (para_env%is_source()) THEN
     979          30 :                WRITE (iw, *)
     980          30 :                WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Maximal Error ", em
     981          30 :                WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Total Error ", et
     982             :                WRITE (iw, '(A,T67,F14.0)') &
     983          30 :                   " Parallel FFT Tests: Performance [Mflops] ", perf
     984          30 :                WRITE (iw, '(A,T67,F14.6)') " Best time : ", t_min
     985          30 :                WRITE (iw, '(A,T67,F14.6)') " Worst time: ", t_max
     986          30 :                IF (iw > 0) CALL m_flush(iw)
     987             :             END IF
     988             : 
     989             :             ! need debugging ???
     990          60 :             IF (em > toler .OR. et > toler) THEN
     991           0 :                CPWARN("The FFT results are not accurate ... starting debug pw_transfer")
     992           0 :                CALL pw_transfer(ca, cb, .TRUE.)
     993           0 :                CALL pw_transfer(cb, cc, .TRUE.)
     994             :             END IF
     995             : 
     996             :             ! done with these grids
     997          60 :             CALL ca%release()
     998          60 :             CALL cb%release()
     999          60 :             CALL cc%release()
    1000         180 :             CALL pw_grid_release(grid)
    1001             : 
    1002             :          END DO
    1003             : 
    1004             :          ! local arrays
    1005          60 :          DEALLOCATE (layouts)
    1006          60 :          DEALLOCATE (t_start)
    1007         190 :          DEALLOCATE (t_end)
    1008             : 
    1009             :       END DO
    1010             : 
    1011             :       ! cleanup
    1012          10 :       CALL cell_release(box)
    1013          10 :       CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
    1014             : 
    1015          20 :    END SUBROUTINE pw_fft_test
    1016             : 
    1017             : ! **************************************************************************************************
    1018             : !> \brief Tests the eigensolver library routines
    1019             : !> \param para_env ...
    1020             : !> \param iw ...
    1021             : !> \param eigensolver_section ...
    1022             : !> \par History
    1023             : !>      JGH  6-Feb-2001 : Test and performance code
    1024             : !> \author JGH  1-JAN-2001
    1025             : ! **************************************************************************************************
    1026           2 :    SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
    1027             : 
    1028             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1029             :       INTEGER                                            :: iw
    1030             :       TYPE(section_vals_type), POINTER                   :: eigensolver_section
    1031             : 
    1032             :       INTEGER                                            :: diag_method, i, i_loop, i_rep, &
    1033             :                                                             init_method, j, n, n_loop, n_rep, &
    1034             :                                                             neig, unit_number
    1035             :       REAL(KIND=dp)                                      :: t1, t2
    1036           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues
    1037           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: buffer
    1038             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1039             :       TYPE(cp_fm_struct_type), POINTER                   :: fmstruct
    1040             :       TYPE(cp_fm_type)                                   :: eigenvectors, matrix, work
    1041           2 :       TYPE(rng_stream_type), ALLOCATABLE                 :: rng_stream
    1042             : 
    1043           2 :       IF (iw > 0) THEN
    1044           1 :          WRITE (UNIT=iw, FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST"
    1045             :       END IF
    1046             : 
    1047             :       ! create blacs env corresponding to para_env
    1048           2 :       NULLIFY (blacs_env)
    1049             :       CALL cp_blacs_env_create(blacs_env=blacs_env, &
    1050           2 :                                para_env=para_env)
    1051             : 
    1052             :       ! loop over all tests
    1053           2 :       CALL section_vals_get(eigensolver_section, n_repetition=n_rep)
    1054          10 :       DO i_rep = 1, n_rep
    1055             : 
    1056             :          ! parse section
    1057           8 :          CALL section_vals_val_get(eigensolver_section, "N", i_rep_section=i_rep, i_val=n)
    1058           8 :          CALL section_vals_val_get(eigensolver_section, "EIGENVALUES", i_rep_section=i_rep, i_val=neig)
    1059           8 :          CALL section_vals_val_get(eigensolver_section, "DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
    1060           8 :          CALL section_vals_val_get(eigensolver_section, "INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
    1061           8 :          CALL section_vals_val_get(eigensolver_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
    1062             : 
    1063             :          ! proper number of eigs
    1064           8 :          IF (neig < 0) neig = n
    1065           8 :          neig = MIN(neig, n)
    1066             : 
    1067             :          ! report
    1068           8 :          IF (iw > 0) THEN
    1069           4 :             WRITE (iw, *) "Matrix size", n
    1070           4 :             WRITE (iw, *) "Number of eigenvalues", neig
    1071           4 :             WRITE (iw, *) "Timing loops", n_loop
    1072           2 :             SELECT CASE (diag_method)
    1073             :             CASE (do_diag_syevd)
    1074           2 :                WRITE (iw, *) "Diag using syevd"
    1075             :             CASE (do_diag_syevx)
    1076           4 :                WRITE (iw, *) "Diag using syevx"
    1077             :             CASE DEFAULT
    1078             :                ! stop
    1079             :             END SELECT
    1080             : 
    1081           4 :             SELECT CASE (init_method)
    1082             :             CASE (do_mat_random)
    1083           4 :                WRITE (iw, *) "using random matrix"
    1084             :             CASE (do_mat_read)
    1085           4 :                WRITE (iw, *) "reading from file"
    1086             :             CASE DEFAULT
    1087             :                ! stop
    1088             :             END SELECT
    1089             :          END IF
    1090             : 
    1091             :          ! create matrix struct type
    1092           8 :          NULLIFY (fmstruct)
    1093             :          CALL cp_fm_struct_create(fmstruct=fmstruct, &
    1094             :                                   para_env=para_env, &
    1095             :                                   context=blacs_env, &
    1096             :                                   nrow_global=n, &
    1097           8 :                                   ncol_global=n)
    1098             : 
    1099             :          ! create all needed matrices, and buffers for the eigenvalues
    1100             :          CALL cp_fm_create(matrix=matrix, &
    1101             :                            matrix_struct=fmstruct, &
    1102           8 :                            name="MATRIX")
    1103           8 :          CALL cp_fm_set_all(matrix, 0.0_dp)
    1104             : 
    1105             :          CALL cp_fm_create(matrix=eigenvectors, &
    1106             :                            matrix_struct=fmstruct, &
    1107           8 :                            name="EIGENVECTORS")
    1108           8 :          CALL cp_fm_set_all(eigenvectors, 0.0_dp)
    1109             : 
    1110             :          CALL cp_fm_create(matrix=work, &
    1111             :                            matrix_struct=fmstruct, &
    1112           8 :                            name="WORK")
    1113           8 :          CALL cp_fm_set_all(matrix, 0.0_dp)
    1114             : 
    1115          24 :          ALLOCATE (eigenvalues(n))
    1116         100 :          eigenvalues = 0.0_dp
    1117          24 :          ALLOCATE (buffer(1, n))
    1118             : 
    1119             :          ! generate initial matrix, either by reading a file, or using random numbers
    1120           8 :          IF (para_env%is_source()) THEN
    1121           4 :             SELECT CASE (init_method)
    1122             :             CASE (do_mat_random)
    1123             :                rng_stream = rng_stream_type( &
    1124             :                             name="rng_stream", &
    1125             :                             distribution_type=UNIFORM, &
    1126           4 :                             extended_precision=.TRUE.)
    1127             :             CASE (do_mat_read)
    1128             :                CALL open_file(file_name="MATRIX", &
    1129             :                               file_action="READ", &
    1130             :                               file_form="FORMATTED", &
    1131             :                               file_status="OLD", &
    1132           4 :                               unit_number=unit_number)
    1133             :             END SELECT
    1134             :          END IF
    1135             : 
    1136         100 :          DO i = 1, n
    1137          92 :             IF (para_env%is_source()) THEN
    1138             :                SELECT CASE (init_method)
    1139             :                CASE (do_mat_random)
    1140         347 :                   DO j = i, n
    1141         347 :                      buffer(1, j) = rng_stream%next() - 0.5_dp
    1142             :                   END DO
    1143             :                   !MK activate/modify for a diagonal dominant symmetric matrix:
    1144             :                   !MK buffer(1,i) = 10.0_dp*buffer(1,i)
    1145             :                CASE (do_mat_read)
    1146          46 :                   READ (UNIT=unit_number, FMT=*) buffer(1, 1:n)
    1147             :                END SELECT
    1148             :             END IF
    1149          92 :             CALL para_env%bcast(buffer)
    1150           8 :             SELECT CASE (init_method)
    1151             :             CASE (do_mat_random)
    1152             :                CALL cp_fm_set_submatrix(fm=matrix, &
    1153             :                                         new_values=buffer, &
    1154             :                                         start_row=i, &
    1155             :                                         start_col=i, &
    1156             :                                         n_rows=1, &
    1157             :                                         n_cols=n - i + 1, &
    1158             :                                         alpha=1.0_dp, &
    1159             :                                         beta=0.0_dp, &
    1160          92 :                                         transpose=.FALSE.)
    1161             :                CALL cp_fm_set_submatrix(fm=matrix, &
    1162             :                                         new_values=buffer, &
    1163             :                                         start_row=i, &
    1164             :                                         start_col=i, &
    1165             :                                         n_rows=n - i + 1, &
    1166             :                                         n_cols=1, &
    1167             :                                         alpha=1.0_dp, &
    1168             :                                         beta=0.0_dp, &
    1169          92 :                                         transpose=.TRUE.)
    1170             :             CASE (do_mat_read)
    1171             :                CALL cp_fm_set_submatrix(fm=matrix, &
    1172             :                                         new_values=buffer, &
    1173             :                                         start_row=i, &
    1174             :                                         start_col=1, &
    1175             :                                         n_rows=1, &
    1176             :                                         n_cols=n, &
    1177             :                                         alpha=1.0_dp, &
    1178             :                                         beta=0.0_dp, &
    1179          92 :                                         transpose=.FALSE.)
    1180             :             END SELECT
    1181             :          END DO
    1182             : 
    1183           8 :          DEALLOCATE (buffer)
    1184             : 
    1185           8 :          IF (para_env%is_source()) THEN
    1186           0 :             SELECT CASE (init_method)
    1187             :             CASE (do_mat_read)
    1188           4 :                CALL close_file(unit_number=unit_number)
    1189             :             END SELECT
    1190             :          END IF
    1191             : 
    1192          88 :          DO i_loop = 1, n_loop
    1193        1000 :             eigenvalues = 0.0_dp
    1194          80 :             CALL cp_fm_set_all(eigenvectors, 0.0_dp)
    1195             :             CALL cp_fm_to_fm(source=matrix, &
    1196          80 :                              destination=work)
    1197             : 
    1198             :             ! DONE, now testing
    1199          80 :             t1 = m_walltime()
    1200          40 :             SELECT CASE (diag_method)
    1201             :             CASE (do_diag_syevd)
    1202             :                CALL cp_fm_syevd(matrix=work, &
    1203             :                                 eigenvectors=eigenvectors, &
    1204          40 :                                 eigenvalues=eigenvalues)
    1205             :             CASE (do_diag_syevx)
    1206             :                CALL cp_fm_syevx(matrix=work, &
    1207             :                                 eigenvectors=eigenvectors, &
    1208             :                                 eigenvalues=eigenvalues, &
    1209             :                                 neig=neig, &
    1210          80 :                                 work_syevx=1.0_dp)
    1211             :             END SELECT
    1212          80 :             t2 = m_walltime()
    1213          88 :             IF (iw > 0) WRITE (iw, *) "Timing for loop ", i_loop, " : ", t2 - t1
    1214             :          END DO
    1215             : 
    1216           8 :          IF (iw > 0) THEN
    1217           4 :             WRITE (iw, *) "Eigenvalues: "
    1218           4 :             WRITE (UNIT=iw, FMT="(T3,5F14.6)") eigenvalues(1:neig)
    1219          42 :             WRITE (UNIT=iw, FMT="(T3,A4,F16.6)") "Sum:", SUM(eigenvalues(1:neig))
    1220           4 :             WRITE (iw, *) ""
    1221             :          END IF
    1222             : 
    1223             :          ! Clean up
    1224           8 :          DEALLOCATE (eigenvalues)
    1225           8 :          CALL cp_fm_release(matrix=work)
    1226           8 :          CALL cp_fm_release(matrix=eigenvectors)
    1227           8 :          CALL cp_fm_release(matrix=matrix)
    1228          18 :          CALL cp_fm_struct_release(fmstruct=fmstruct)
    1229             : 
    1230             :       END DO
    1231             : 
    1232           2 :       CALL cp_blacs_env_release(blacs_env=blacs_env)
    1233             : 
    1234           4 :    END SUBROUTINE eigensolver_test
    1235             : 
    1236             : ! **************************************************************************************************
    1237             : !> \brief Tests the parallel matrix multiply
    1238             : !> \param para_env ...
    1239             : !> \param iw ...
    1240             : !> \param cp_fm_gemm_test_section ...
    1241             : ! **************************************************************************************************
    1242           4 :    SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
    1243             : 
    1244             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1245             :       INTEGER                                            :: iw
    1246             :       TYPE(section_vals_type), POINTER                   :: cp_fm_gemm_test_section
    1247             : 
    1248             :       CHARACTER(LEN=1)                                   :: transa, transb
    1249             :       INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, ncol_block_actual, &
    1250             :          ncol_global, np, nrow_block, nrow_block_actual, nrow_global
    1251           4 :       INTEGER, DIMENSION(:), POINTER                     :: grid_2d
    1252             :       LOGICAL                                            :: force_blocksize, row_major, transa_p, &
    1253             :                                                             transb_p
    1254             :       REAL(KIND=dp)                                      :: t1, t2, t3, t4
    1255             :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1256             :       TYPE(cp_fm_struct_type), POINTER                   :: fmstruct_a, fmstruct_b, fmstruct_c
    1257             :       TYPE(cp_fm_type)                                   :: matrix_a, matrix_b, matrix_c
    1258             : 
    1259           4 :       CALL section_vals_get(cp_fm_gemm_test_section, n_repetition=n_rep)
    1260          24 :       DO i_rep = 1, n_rep
    1261             : 
    1262             :          ! how often should we do the multiply
    1263          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1264             : 
    1265             :          ! matrices def.
    1266          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "K", i_rep_section=i_rep, i_val=k)
    1267          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "N", i_rep_section=i_rep, i_val=n)
    1268          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "M", i_rep_section=i_rep, i_val=m)
    1269          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1270          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1271          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "nrow_block", i_rep_section=i_rep, i_val=nrow_block)
    1272          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "ncol_block", i_rep_section=i_rep, i_val=ncol_block)
    1273          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
    1274          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
    1275          20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
    1276          20 :          transa = "N"
    1277          20 :          transb = "N"
    1278          20 :          IF (transa_p) transa = "T"
    1279          20 :          IF (transb_p) transb = "T"
    1280             : 
    1281          20 :          IF (iw > 0) THEN
    1282          10 :             WRITE (iw, '(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
    1283          10 :             WRITE (iw, '(T2,A)', ADVANCE="NO") "C = "
    1284          10 :             IF (transa_p) THEN
    1285           2 :                WRITE (iw, '(A)', ADVANCE="NO") "TRANSPOSE(A) x"
    1286             :             ELSE
    1287           8 :                WRITE (iw, '(A)', ADVANCE="NO") "A x "
    1288             :             END IF
    1289          10 :             IF (transb_p) THEN
    1290           2 :                WRITE (iw, '(A)') "TRANSPOSE(B) "
    1291             :             ELSE
    1292           8 :                WRITE (iw, '(A)') "B "
    1293             :             END IF
    1294          10 :             WRITE (iw, '(T2,A,T50,I5,A,I5)') 'requested block size', nrow_block, ' by ', ncol_block
    1295          10 :             WRITE (iw, '(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ', n_loop
    1296          10 :             WRITE (iw, '(T2,A,T50,L5)') 'Row Major', row_major
    1297          30 :             WRITE (iw, '(T2,A,T50,2I7)') 'GRID_2D ', grid_2d
    1298          10 :             WRITE (iw, '(T2,A,T50,L5)') 'Force blocksize ', force_blocksize
    1299             :             ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
    1300          10 :             np = cp_fm_pilaenv(0, 'D')
    1301          10 :             IF (np > 0) THEN
    1302          10 :                WRITE (iw, '(T2,A,T50,I5)') 'PILAENV blocksize', np
    1303             :             END IF
    1304             :          END IF
    1305             : 
    1306          20 :          NULLIFY (blacs_env)
    1307             :          CALL cp_blacs_env_create(blacs_env=blacs_env, &
    1308             :                                   para_env=para_env, &
    1309             :                                   row_major=row_major, &
    1310          20 :                                   grid_2d=grid_2d)
    1311             : 
    1312          20 :          NULLIFY (fmstruct_a)
    1313          20 :          IF (transa_p) THEN
    1314           4 :             nrow_global = m; ncol_global = k
    1315             :          ELSE
    1316          16 :             nrow_global = k; ncol_global = m
    1317             :          END IF
    1318             :          CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env, &
    1319             :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1320          20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1321          20 :          CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1322          30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ', nrow_global, " by ", ncol_global, &
    1323          20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1324             : 
    1325          20 :          IF (transb_p) THEN
    1326           4 :             nrow_global = n; ncol_global = m
    1327             :          ELSE
    1328          16 :             nrow_global = m; ncol_global = n
    1329             :          END IF
    1330          20 :          NULLIFY (fmstruct_b)
    1331             :          CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env, &
    1332             :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1333          20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1334          20 :          CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1335          30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix B ', nrow_global, " by ", ncol_global, &
    1336          20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1337             : 
    1338          20 :          NULLIFY (fmstruct_c)
    1339          20 :          nrow_global = k
    1340          20 :          ncol_global = n
    1341             :          CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env, &
    1342             :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1343          20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1344          20 :          CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1345          30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix C ', nrow_global, " by ", ncol_global, &
    1346          20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1347             : 
    1348          20 :          CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A")
    1349          20 :          CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B")
    1350          20 :          CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C")
    1351             : 
    1352          20 :          CALL RANDOM_NUMBER(matrix_a%local_data)
    1353          20 :          CALL RANDOM_NUMBER(matrix_b%local_data)
    1354          20 :          CALL RANDOM_NUMBER(matrix_c%local_data)
    1355             : 
    1356          20 :          IF (iw > 0) CALL m_flush(iw)
    1357             : 
    1358          20 :          t1 = m_walltime()
    1359         932 :          DO i_loop = 1, N_loop
    1360         912 :             t3 = m_walltime()
    1361         912 :             CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
    1362         912 :             t4 = m_walltime()
    1363         932 :             IF (iw > 0) THEN
    1364         456 :                WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm timing: ", (t4 - t3)
    1365         456 :                CALL m_flush(iw)
    1366             :             END IF
    1367             :          END DO
    1368          20 :          t2 = m_walltime()
    1369             : 
    1370          20 :          IF (iw > 0) THEN
    1371          10 :             WRITE (iw, '(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ", (t2 - t1)/N_loop
    1372          10 :             IF (t2 > t1) THEN
    1373          10 :                WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
    1374          20 :                   2*REAL(m, kind=dp)*REAL(n, kind=dp)*REAL(k, kind=dp)*N_loop/MAX(0.001_dp, t2 - t1)/1.0E9_dp/para_env%num_pe
    1375             :             END IF
    1376             :          END IF
    1377             : 
    1378          20 :          CALL cp_fm_release(matrix=matrix_a)
    1379          20 :          CALL cp_fm_release(matrix=matrix_b)
    1380          20 :          CALL cp_fm_release(matrix=matrix_c)
    1381          20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_a)
    1382          20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_b)
    1383          20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_c)
    1384         124 :          CALL cp_blacs_env_release(blacs_env=blacs_env)
    1385             : 
    1386             :       END DO
    1387             : 
    1388           4 :    END SUBROUTINE cp_fm_gemm_test
    1389             : 
    1390             : ! **************************************************************************************************
    1391             : !> \brief Tests the DBCSR interface.
    1392             : !> \param para_env ...
    1393             : !> \param iw ...
    1394             : !> \param input_section ...
    1395             : ! **************************************************************************************************
    1396          32 :    SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
    1397             : 
    1398             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1399             :       INTEGER                                            :: iw
    1400             :       TYPE(section_vals_type), POINTER                   :: input_section
    1401             : 
    1402             :       CHARACTER, DIMENSION(3)                            :: types
    1403             :       INTEGER                                            :: data_type, i_rep, k, m, n, N_loop, &
    1404             :                                                             n_rep, test_type
    1405          32 :       INTEGER, DIMENSION(:), POINTER                     :: bs_k, bs_m, bs_n, nproc
    1406             :       LOGICAL                                            :: always_checksum, retain_sparsity, &
    1407             :                                                             transa_p, transb_p
    1408             :       REAL(KIND=dp)                                      :: alpha, beta, filter_eps, s_a, s_b, s_c
    1409             : 
    1410             : !   ---------------------------------------------------------------------------
    1411             : 
    1412          32 :       NULLIFY (bs_m, bs_n, bs_k)
    1413          32 :       CALL section_vals_get(input_section, n_repetition=n_rep)
    1414          32 :       CALL dbcsr_reset_randmat_seed()
    1415          78 :       DO i_rep = 1, n_rep
    1416             :          ! how often should we do the multiply
    1417          46 :          CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1418             : 
    1419             :          ! matrices def.
    1420          46 :          CALL section_vals_val_get(input_section, "TEST_TYPE", i_rep_section=i_rep, i_val=test_type)
    1421          46 :          CALL section_vals_val_get(input_section, "DATA_TYPE", i_rep_section=i_rep, i_val=data_type)
    1422          46 :          CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
    1423          46 :          CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
    1424          46 :          CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
    1425          46 :          CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1426          46 :          CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1427             :          CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, &
    1428          46 :                                    i_vals=bs_m)
    1429             :          CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, &
    1430          46 :                                    i_vals=bs_n)
    1431             :          CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, &
    1432          46 :                                    i_vals=bs_k)
    1433          46 :          CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
    1434          46 :          CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
    1435          46 :          CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
    1436          46 :          CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
    1437          46 :          CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
    1438          46 :          CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
    1439             :          CALL section_vals_val_get(input_section, "nproc", i_rep_section=i_rep, &
    1440          46 :                                    i_vals=nproc)
    1441             :          CALL section_vals_val_get(input_section, "atype", i_rep_section=i_rep, &
    1442          46 :                                    c_val=types(1))
    1443             :          CALL section_vals_val_get(input_section, "btype", i_rep_section=i_rep, &
    1444          46 :                                    c_val=types(2))
    1445             :          CALL section_vals_val_get(input_section, "ctype", i_rep_section=i_rep, &
    1446          46 :                                    c_val=types(3))
    1447             :          CALL section_vals_val_get(input_section, "filter_eps", &
    1448          46 :                                    i_rep_section=i_rep, r_val=filter_eps)
    1449          46 :          CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
    1450             : 
    1451             :          CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
    1452             :                               (/m, n, k/), &
    1453             :                               (/transa_p, transb_p/), &
    1454             :                               bs_m, bs_n, bs_k, &
    1455             :                               (/s_a, s_b, s_c/), &
    1456             :                               alpha, beta, &
    1457             :                               data_type=data_type, &
    1458             :                               test_type=test_type, &
    1459             :                               n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
    1460         538 :                               always_checksum=always_checksum)
    1461             :       END DO
    1462          32 :    END SUBROUTINE cp_dbcsr_tests
    1463             : 
    1464             : ! **************************************************************************************************
    1465             : !> \brief Tests the DBM library.
    1466             : !> \param para_env ...
    1467             : !> \param iw ...
    1468             : !> \param input_section ...
    1469             : ! **************************************************************************************************
    1470          14 :    SUBROUTINE run_dbm_tests(para_env, iw, input_section)
    1471             : 
    1472             :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1473             :       INTEGER                                            :: iw
    1474             :       TYPE(section_vals_type), POINTER                   :: input_section
    1475             : 
    1476             :       INTEGER                                            :: i_rep, k, m, n, N_loop, n_rep
    1477          14 :       INTEGER, DIMENSION(:), POINTER                     :: bs_k, bs_m, bs_n
    1478             :       LOGICAL                                            :: always_checksum, retain_sparsity, &
    1479             :                                                             transa_p, transb_p
    1480             :       REAL(KIND=dp)                                      :: alpha, beta, filter_eps, s_a, s_b, s_c
    1481             : 
    1482             : !   ---------------------------------------------------------------------------
    1483             : 
    1484          14 :       NULLIFY (bs_m, bs_n, bs_k)
    1485          14 :       CALL section_vals_get(input_section, n_repetition=n_rep)
    1486          14 :       CALL dbcsr_reset_randmat_seed()
    1487          28 :       DO i_rep = 1, n_rep
    1488          14 :          CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1489          14 :          CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
    1490          14 :          CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
    1491          14 :          CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
    1492          14 :          CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1493          14 :          CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1494          14 :          CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, i_vals=bs_m)
    1495          14 :          CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, i_vals=bs_n)
    1496          14 :          CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, i_vals=bs_k)
    1497          14 :          CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
    1498          14 :          CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
    1499          14 :          CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
    1500          14 :          CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
    1501          14 :          CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
    1502          14 :          CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
    1503          14 :          CALL section_vals_val_get(input_section, "filter_eps", i_rep_section=i_rep, r_val=filter_eps)
    1504          14 :          CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
    1505             : 
    1506             :          CALL dbm_run_tests(mp_group=para_env, &
    1507             :                             io_unit=iw, &
    1508             :                             matrix_sizes=(/m, n, k/), &
    1509             :                             trs=(/transa_p, transb_p/), &
    1510             :                             bs_m=bs_m, &
    1511             :                             bs_n=bs_n, &
    1512             :                             bs_k=bs_k, &
    1513             :                             sparsities=(/s_a, s_b, s_c/), &
    1514             :                             alpha=alpha, &
    1515             :                             beta=beta, &
    1516             :                             n_loops=n_loop, &
    1517             :                             eps=filter_eps, &
    1518             :                             retain_sparsity=retain_sparsity, &
    1519         154 :                             always_checksum=always_checksum)
    1520             :       END DO
    1521          14 :    END SUBROUTINE run_dbm_tests
    1522             : 
    1523        8484 : END MODULE library_tests

Generated by: LCOV version 1.15