LCOV - code coverage report
Current view: top level - src - library_tests.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 88.8 % 703 624
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 11 11

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Performance tests for basic tasks like matrix multiplies, copy, fft.
      10              : !> \par History
      11              : !>      30-Nov-2000 (JGH) added input
      12              : !>      02-Jan-2001 (JGH) Parallel FFT
      13              : !>      28-Feb-2002 (JGH) Clebsch-Gordon Coefficients
      14              : !>      06-Jun-2003 (JGH) Real space grid test
      15              : !>      Eigensolver test (29.08.05,MK)
      16              : !> \author JGH  6-NOV-2000
      17              : ! **************************************************************************************************
      18              : MODULE library_tests
      19              : 
      20              :    USE ai_coulomb_test,                 ONLY: eri_test
      21              :    USE cell_methods,                    ONLY: cell_create,&
      22              :                                               init_cell
      23              :    USE cell_types,                      ONLY: cell_release,&
      24              :                                               cell_type
      25              :    USE cg_test,                         ONLY: clebsch_gordon_test
      26              :    USE cp_blacs_env,                    ONLY: cp_blacs_env_create,&
      27              :                                               cp_blacs_env_release,&
      28              :                                               cp_blacs_env_type
      29              :    USE cp_dbcsr_api,                    ONLY: dbcsr_reset_randmat_seed,&
      30              :                                               dbcsr_run_tests
      31              :    USE cp_eri_mme_interface,            ONLY: cp_eri_mme_perf_acc_test
      32              :    USE cp_files,                        ONLY: close_file,&
      33              :                                               open_file
      34              :    USE cp_fm_basic_linalg,              ONLY: cp_fm_gemm
      35              :    USE cp_fm_diag,                      ONLY: cp_fm_syevd,&
      36              :                                               cp_fm_syevx
      37              :    USE cp_fm_struct,                    ONLY: cp_fm_struct_create,&
      38              :                                               cp_fm_struct_get,&
      39              :                                               cp_fm_struct_release,&
      40              :                                               cp_fm_struct_type
      41              :    USE cp_fm_types,                     ONLY: cp_fm_create,&
      42              :                                               cp_fm_pilaenv,&
      43              :                                               cp_fm_release,&
      44              :                                               cp_fm_set_all,&
      45              :                                               cp_fm_set_submatrix,&
      46              :                                               cp_fm_to_fm,&
      47              :                                               cp_fm_type
      48              :    USE cp_log_handling,                 ONLY: cp_get_default_logger,&
      49              :                                               cp_logger_type
      50              :    USE cp_output_handling,              ONLY: cp_print_key_finished_output,&
      51              :                                               cp_print_key_unit_nr
      52              :    USE cp_realspace_grid_init,          ONLY: init_input_type
      53              :    USE dbm_tests,                       ONLY: dbm_run_tests
      54              :    USE fft_tools,                       ONLY: BWFFT,&
      55              :                                               FFT_RADIX_CLOSEST,&
      56              :                                               FWFFT,&
      57              :                                               fft3d,&
      58              :                                               fft_radix_operations,&
      59              :                                               finalize_fft,&
      60              :                                               init_fft
      61              :    USE global_types,                    ONLY: global_environment_type
      62              :    USE input_constants,                 ONLY: do_diag_syevd,&
      63              :                                               do_diag_syevx,&
      64              :                                               do_mat_random,&
      65              :                                               do_mat_read,&
      66              :                                               do_pwgrid_ns_fullspace,&
      67              :                                               do_pwgrid_ns_halfspace,&
      68              :                                               do_pwgrid_spherical
      69              :    USE input_section_types,             ONLY: section_vals_get,&
      70              :                                               section_vals_get_subs_vals,&
      71              :                                               section_vals_type,&
      72              :                                               section_vals_val_get
      73              :    USE kinds,                           ONLY: dp
      74              :    USE machine,                         ONLY: m_flush,&
      75              :                                               m_walltime
      76              :    USE mathconstants,                   ONLY: gaussi
      77              :    USE message_passing,                 ONLY: mp_para_env_type
      78              :    USE minimax_exp,                     ONLY: validate_exp_minimax
      79              :    USE mp2_grids,                       ONLY: test_least_square_ft
      80              :    USE mp_perf_test,                    ONLY: mpi_perf_test
      81              :    USE parallel_gemm_api,               ONLY: parallel_gemm
      82              :    USE parallel_rng_types,              ONLY: UNIFORM,&
      83              :                                               rng_stream_type
      84              :    USE pw_grid_types,                   ONLY: FULLSPACE,&
      85              :                                               HALFSPACE,&
      86              :                                               pw_grid_type
      87              :    USE pw_grids,                        ONLY: pw_grid_create,&
      88              :                                               pw_grid_release
      89              :    USE pw_methods,                      ONLY: pw_transfer,&
      90              :                                               pw_zero
      91              :    USE pw_types,                        ONLY: pw_c1d_gs_type,&
      92              :                                               pw_c3d_rs_type,&
      93              :                                               pw_r3d_rs_type
      94              :    USE realspace_grid_types,            ONLY: &
      95              :         realspace_grid_desc_type, realspace_grid_input_type, realspace_grid_type, rs_grid_create, &
      96              :         rs_grid_create_descriptor, rs_grid_print, rs_grid_release, rs_grid_release_descriptor, &
      97              :         rs_grid_zero, transfer_pw2rs, transfer_rs2pw
      98              :    USE shg_integrals_test,              ONLY: shg_integrals_perf_acc_test
      99              : #include "./base/base_uses.f90"
     100              : 
     101              :    IMPLICIT NONE
     102              : 
     103              :    PRIVATE
     104              :    PUBLIC :: lib_test
     105              : 
     106              :    INTEGER                  :: runtest(100)
     107              :    REAL(KIND=dp)           :: max_memory
     108              :    REAL(KIND=dp), PARAMETER :: threshold = 1.0E-8_dp
     109              :    CHARACTER(len=*), PARAMETER, PRIVATE :: moduleN = 'library_tests'
     110              : 
     111              : CONTAINS
     112              : 
     113              : ! **************************************************************************************************
     114              : !> \brief Master routine for tests
     115              : !> \param root_section ...
     116              : !> \param para_env ...
     117              : !> \param globenv ...
     118              : !> \par History
     119              : !>      none
     120              : !> \author JGH  6-NOV-2000
     121              : ! **************************************************************************************************
     122          720 :    SUBROUTINE lib_test(root_section, para_env, globenv)
     123              : 
     124              :       TYPE(section_vals_type), POINTER                   :: root_section
     125              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     126              :       TYPE(global_environment_type), POINTER             :: globenv
     127              : 
     128              :       CHARACTER(LEN=*), PARAMETER                        :: routineN = 'lib_test'
     129              : 
     130              :       INTEGER                                            :: handle, iw
     131              :       LOGICAL                                            :: explicit
     132              :       TYPE(cp_logger_type), POINTER                      :: logger
     133              :       TYPE(section_vals_type), POINTER :: cp_dbcsr_test_section, cp_fm_gemm_test_section, &
     134              :          dbm_test_section, eigensolver_section, eri_mme_test_section, pw_transfer_section, &
     135              :          rs_pw_transfer_section, shg_integrals_test_section
     136              : 
     137           80 :       CALL timeset(routineN, handle)
     138              : 
     139           80 :       logger => cp_get_default_logger()
     140           80 :       iw = cp_print_key_unit_nr(logger, root_section, "TEST%PROGRAM_RUN_INFO", extension=".log")
     141              : 
     142           80 :       IF (iw > 0) THEN
     143           40 :          WRITE (iw, '(T2,79("*"))')
     144           40 :          WRITE (iw, '(A,T31,A,T80,A)') ' *', ' PERFORMANCE TESTS ', '*'
     145           40 :          WRITE (iw, '(T2,79("*"))')
     146              :       END IF
     147              :       !
     148           80 :       CALL test_input(root_section, para_env)
     149              :       !
     150           80 :       IF (runtest(1) /= 0) CALL copy_test(para_env, iw)
     151              :       !
     152           80 :       IF (runtest(2) /= 0) CALL matmul_test(para_env, test_matmul=.TRUE., test_dgemm=.FALSE., iw=iw)
     153           80 :       IF (runtest(5) /= 0) CALL matmul_test(para_env, test_matmul=.FALSE., test_dgemm=.TRUE., iw=iw)
     154              :       !
     155           80 :       IF (runtest(3) /= 0) CALL fft_test(para_env, iw, globenv%fftw_plan_type, &
     156            2 :                                          globenv%fftw_wisdom_file_name)
     157              :       !
     158           80 :       IF (runtest(4) /= 0) CALL eri_test(iw)
     159              :       !
     160           80 :       IF (runtest(6) /= 0) CALL clebsch_gordon_test()
     161              :       !
     162              :       ! runtest 7 has been deleted and can be recycled
     163              :       !
     164           80 :       IF (runtest(8) /= 0) CALL mpi_perf_test(para_env, runtest(8), iw)
     165              :       !
     166           80 :       IF (runtest(10) /= 0) CALL validate_exp_minimax(runtest(10), iw)
     167              :       !
     168           80 :       IF (runtest(11) /= 0) CALL test_least_square_ft(runtest(11), iw)
     169              :       !
     170              : 
     171           80 :       rs_pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%RS_PW_TRANSFER")
     172           80 :       CALL section_vals_get(rs_pw_transfer_section, explicit=explicit)
     173           80 :       IF (explicit) THEN
     174            2 :          CALL rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
     175              :       END IF
     176              : 
     177           80 :       pw_transfer_section => section_vals_get_subs_vals(root_section, "TEST%PW_TRANSFER")
     178           80 :       CALL section_vals_get(pw_transfer_section, explicit=explicit)
     179           80 :       IF (explicit) THEN
     180           10 :          CALL pw_fft_test(para_env, iw, globenv, pw_transfer_section)
     181              :       END IF
     182              : 
     183           80 :       cp_fm_gemm_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_FM_GEMM")
     184           80 :       CALL section_vals_get(cp_fm_gemm_test_section, explicit=explicit)
     185           80 :       IF (explicit) THEN
     186            4 :          CALL cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
     187              :       END IF
     188              : 
     189           80 :       eigensolver_section => section_vals_get_subs_vals(root_section, "TEST%EIGENSOLVER")
     190           80 :       CALL section_vals_get(eigensolver_section, explicit=explicit)
     191           80 :       IF (explicit) THEN
     192            2 :          CALL eigensolver_test(para_env, iw, eigensolver_section)
     193              :       END IF
     194              : 
     195           80 :       eri_mme_test_section => section_vals_get_subs_vals(root_section, "TEST%ERI_MME_TEST")
     196           80 :       CALL section_vals_get(eri_mme_test_section, explicit=explicit)
     197           80 :       IF (explicit) THEN
     198            8 :          CALL cp_eri_mme_perf_acc_test(para_env, iw, eri_mme_test_section)
     199              :       END IF
     200              : 
     201           80 :       shg_integrals_test_section => section_vals_get_subs_vals(root_section, "TEST%SHG_INTEGRALS_TEST")
     202           80 :       CALL section_vals_get(shg_integrals_test_section, explicit=explicit)
     203           80 :       IF (explicit) THEN
     204            4 :          CALL shg_integrals_perf_acc_test(iw, shg_integrals_test_section)
     205              :       END IF
     206              : 
     207              :       ! DBCSR tests
     208           80 :       cp_dbcsr_test_section => section_vals_get_subs_vals(root_section, "TEST%CP_DBCSR")
     209           80 :       CALL section_vals_get(cp_dbcsr_test_section, explicit=explicit)
     210           80 :       IF (explicit) THEN
     211           32 :          CALL cp_dbcsr_tests(para_env, iw, cp_dbcsr_test_section)
     212              :       END IF
     213              : 
     214              :       ! DBM tests
     215           80 :       dbm_test_section => section_vals_get_subs_vals(root_section, "TEST%DBM")
     216           80 :       CALL section_vals_get(dbm_test_section, explicit=explicit)
     217           80 :       IF (explicit) THEN
     218           14 :          CALL run_dbm_tests(para_env, iw, dbm_test_section)
     219              :       END IF
     220              : 
     221           80 :       CALL cp_print_key_finished_output(iw, logger, root_section, "TEST%PROGRAM_RUN_INFO")
     222              : 
     223           80 :       CALL timestop(handle)
     224              : 
     225           80 :    END SUBROUTINE lib_test
     226              : 
     227              : ! **************************************************************************************************
     228              : !> \brief Reads input section &TEST ... &END
     229              : !> \param root_section ...
     230              : !> \param para_env ...
     231              : !> \author JGH 30-NOV-2000
     232              : !> \note
     233              : !> I---------------------------------------------------------------------------I
     234              : !> I SECTION: &TEST ... &END                                                   I
     235              : !> I                                                                           I
     236              : !> I    MEMORY   max_memory                                                    I
     237              : !> I    COPY     n                                                             I
     238              : !> I    MATMUL   n                                                             I
     239              : !> I    FFT      n                                                             I
     240              : !> I    ERI      n                                                             I
     241              : !> I    PW_FFT   n                                                             I
     242              : !> I    Clebsch-Gordon n                                                       I
     243              : !> I    RS_GRIDS n                                                             I
     244              : !> I    MPI      n                                                             I
     245              : !> I    RNG      n             -> Parallel random number generator             I
     246              : !> I---------------------------------------------------------------------------I
     247              : ! **************************************************************************************************
     248          160 :    SUBROUTINE test_input(root_section, para_env)
     249              :       TYPE(section_vals_type), POINTER                   :: root_section
     250              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     251              : 
     252              :       TYPE(section_vals_type), POINTER                   :: test_section
     253              : 
     254              : !
     255              : !..defaults
     256              : ! using this style is not recommended, introduce sections instead (see e.g. cp_fm_gemm)
     257              : 
     258           80 :       runtest = 0
     259           80 :       test_section => section_vals_get_subs_vals(root_section, "TEST")
     260           80 :       CALL section_vals_val_get(test_section, "MEMORY", r_val=max_memory)
     261           80 :       CALL section_vals_val_get(test_section, 'COPY', i_val=runtest(1))
     262           80 :       CALL section_vals_val_get(test_section, 'MATMUL', i_val=runtest(2))
     263           80 :       CALL section_vals_val_get(test_section, 'DGEMM', i_val=runtest(5))
     264           80 :       CALL section_vals_val_get(test_section, 'FFT', i_val=runtest(3))
     265           80 :       CALL section_vals_val_get(test_section, 'ERI', i_val=runtest(4))
     266           80 :       CALL section_vals_val_get(test_section, 'CLEBSCH_GORDON', i_val=runtest(6))
     267           80 :       CALL section_vals_val_get(test_section, 'MPI', i_val=runtest(8))
     268           80 :       CALL section_vals_val_get(test_section, 'MINIMAX', i_val=runtest(10))
     269           80 :       CALL section_vals_val_get(test_section, 'LEAST_SQ_FT', i_val=runtest(11))
     270              : 
     271           80 :       CALL para_env%sync()
     272           80 :    END SUBROUTINE test_input
     273              : 
     274              : ! **************************************************************************************************
     275              : !> \brief Tests the performance to copy two vectors.
     276              : !> \param para_env ...
     277              : !> \param iw ...
     278              : !> \par History
     279              : !>      none
     280              : !> \author JGH  6-NOV-2000
     281              : !> \note
     282              : !>      The results of these tests allow to determine the size of the cache
     283              : !>      of the CPU. This can be used to optimize the performance of the
     284              : !>      FFTSG library.
     285              : ! **************************************************************************************************
     286            2 :    SUBROUTINE copy_test(para_env, iw)
     287              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     288              :       INTEGER                                            :: iw
     289              : 
     290              :       INTEGER                                            :: i, ierr, j, len, ntim, siz
     291              :       REAL(KIND=dp)                                      :: perf, t, tend, tstart
     292            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: ca, cb
     293              : 
     294              : ! test for copy --> Cache size
     295              : 
     296            2 :       siz = ABS(runtest(1))
     297            2 :       IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of copy ( F95 ) "
     298           34 :       DO i = 6, 24
     299           34 :          len = 2**i
     300           34 :          IF (8.0_dp*REAL(len, KIND=dp) > max_memory*0.5_dp) EXIT
     301           96 :          ALLOCATE (ca(len), STAT=ierr)
     302           32 :          IF (ierr /= 0) EXIT
     303           64 :          ALLOCATE (cb(len), STAT=ierr)
     304           32 :          IF (ierr /= 0) EXIT
     305              : 
     306           32 :          CALL RANDOM_NUMBER(ca)
     307           32 :          ntim = NINT(1.e7_dp/REAL(len, KIND=dp))
     308           32 :          ntim = MAX(ntim, 1)
     309           32 :          ntim = MIN(ntim, siz*10000)
     310              : 
     311           32 :          tstart = m_walltime()
     312       512524 :          DO j = 1, ntim
     313    315058412 :             cb(:) = ca(:)
     314       512524 :             ca(1) = REAL(j, KIND=dp)
     315              :          END DO
     316           32 :          tend = m_walltime()
     317           32 :          t = tend - tstart + threshold
     318           32 :          IF (t > 0.0_dp) THEN
     319           32 :             perf = REAL(ntim, KIND=dp)*REAL(len, KIND=dp)*1.e-6_dp/t
     320              :          ELSE
     321            0 :             perf = 0.0_dp
     322              :          END IF
     323              : 
     324           32 :          IF (para_env%is_source()) THEN
     325           16 :             WRITE (iw, '(A,i2,i10,A,T59,F14.4,A)') " Copy test:   Size = 2^", i, &
     326           32 :                len/1024, " Kwords", perf, " Mcopy/s"
     327              :          END IF
     328              : 
     329           32 :          DEALLOCATE (ca)
     330           34 :          DEALLOCATE (cb)
     331              :       END DO
     332            2 :       CALL para_env%sync()
     333            2 :    END SUBROUTINE copy_test
     334              : 
     335              : ! **************************************************************************************************
     336              : !> \brief Tests the performance of different kinds of matrix matrix multiply
     337              : !>      kernels for the BLAS and F95 intrinsic matmul.
     338              : !> \param para_env ...
     339              : !> \param test_matmul ...
     340              : !> \param test_dgemm ...
     341              : !> \param iw ...
     342              : !> \par History
     343              : !>      none
     344              : !> \author JGH  6-NOV-2000
     345              : ! **************************************************************************************************
     346            2 :    SUBROUTINE matmul_test(para_env, test_matmul, test_dgemm, iw)
     347              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     348              :       LOGICAL                                            :: test_matmul, test_dgemm
     349              :       INTEGER                                            :: iw
     350              : 
     351              :       INTEGER                                            :: i, ierr, j, len, ntim, siz
     352              :       REAL(KIND=dp)                                      :: perf, t, tend, tstart, xdum
     353            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: ma, mb, mc
     354              : 
     355              : ! test for matrix multpies
     356              : 
     357            2 :       IF (test_matmul) THEN
     358            2 :          siz = ABS(runtest(2))
     359            2 :          IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( F95 ) "
     360            2 :          DO i = 5, siz, 2
     361            4 :             len = 2**i + 1
     362            4 :             IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
     363           16 :             ALLOCATE (ma(len, len), STAT=ierr)
     364            4 :             IF (ierr /= 0) EXIT
     365           12 :             ALLOCATE (mb(len, len), STAT=ierr)
     366            4 :             IF (ierr /= 0) EXIT
     367           12 :             ALLOCATE (mc(len, len), STAT=ierr)
     368            4 :             IF (ierr /= 0) EXIT
     369        35788 :             mc = 0.0_dp
     370              : 
     371            4 :             CALL RANDOM_NUMBER(xdum)
     372        35788 :             ma = xdum
     373            4 :             CALL RANDOM_NUMBER(xdum)
     374        35788 :             mb = xdum
     375            4 :             ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
     376            4 :             ntim = MAX(ntim, 1)
     377            4 :             ntim = MIN(ntim, siz*200)
     378            4 :             tstart = m_walltime()
     379         2832 :             DO j = 1, ntim
     380         2828 :                mc(:, :) = MATMUL(ma, mb)
     381         2832 :                ma(1, 1) = REAL(j, KIND=dp)
     382              :             END DO
     383            4 :             tend = m_walltime()
     384            4 :             t = tend - tstart + threshold
     385            4 :             perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     386            4 :             IF (para_env%is_source()) THEN
     387              :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     388            2 :                   " Matrix multiply test: c = a * b         Size = ", len, perf, " Mflop/s"
     389              :             END IF
     390            4 :             tstart = m_walltime()
     391         2832 :             DO j = 1, ntim
     392      3895652 :                mc(:, :) = mc + MATMUL(ma, mb)
     393         2832 :                ma(1, 1) = REAL(j, KIND=dp)
     394              :             END DO
     395            4 :             tend = m_walltime()
     396            4 :             t = tend - tstart + threshold
     397            4 :             IF (t > 0.0_dp) THEN
     398            4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     399              :             ELSE
     400            0 :                perf = 0.0_dp
     401              :             END IF
     402              : 
     403            4 :             IF (para_env%is_source()) THEN
     404              :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     405            2 :                   " Matrix multiply test: a = a * b         Size = ", len, perf, " Mflop/s"
     406              :             END IF
     407              : 
     408            4 :             tstart = m_walltime()
     409         2832 :             DO j = 1, ntim
     410      3895652 :                mc(:, :) = mc + MATMUL(ma, TRANSPOSE(mb))
     411         2832 :                ma(1, 1) = REAL(j, KIND=dp)
     412              :             END DO
     413            4 :             tend = m_walltime()
     414            4 :             t = tend - tstart + threshold
     415            4 :             IF (t > 0.0_dp) THEN
     416            4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     417              :             ELSE
     418            0 :                perf = 0.0_dp
     419              :             END IF
     420              : 
     421            4 :             IF (para_env%is_source()) THEN
     422              :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     423            2 :                   " Matrix multiply test: c = a * b(T)      Size = ", len, perf, " Mflop/s"
     424              :             END IF
     425              : 
     426            4 :             tstart = m_walltime()
     427         2832 :             DO j = 1, ntim
     428      3895652 :                mc(:, :) = mc + MATMUL(TRANSPOSE(ma), mb)
     429         2832 :                ma(1, 1) = REAL(j, KIND=dp)
     430              :             END DO
     431            4 :             tend = m_walltime()
     432            4 :             t = tend - tstart + threshold
     433            4 :             IF (t > 0.0_dp) THEN
     434            4 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     435              :             ELSE
     436            0 :                perf = 0.0_dp
     437              :             END IF
     438              : 
     439            4 :             IF (para_env%is_source()) THEN
     440              :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     441            2 :                   " Matrix multiply test: c = a(T) * b      Size = ", len, perf, " Mflop/s"
     442              :             END IF
     443              : 
     444            4 :             DEALLOCATE (ma)
     445            4 :             DEALLOCATE (mb)
     446            4 :             DEALLOCATE (mc)
     447              :          END DO
     448              :       END IF
     449              : 
     450              :       ! test for matrix multpies
     451            2 :       IF (test_dgemm) THEN
     452            0 :          siz = ABS(runtest(5))
     453            0 :          IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of matmul ( BLAS ) "
     454            0 :          DO i = 5, siz, 2
     455            0 :             len = 2**i + 1
     456            0 :             IF (8.0_dp*REAL(len*len, KIND=dp) > max_memory*0.3_dp) EXIT
     457            0 :             ALLOCATE (ma(len, len), STAT=ierr)
     458            0 :             IF (ierr /= 0) EXIT
     459            0 :             ALLOCATE (mb(len, len), STAT=ierr)
     460            0 :             IF (ierr /= 0) EXIT
     461            0 :             ALLOCATE (mc(len, len), STAT=ierr)
     462            0 :             IF (ierr /= 0) EXIT
     463            0 :             mc = 0.0_dp
     464              : 
     465            0 :             CALL RANDOM_NUMBER(xdum)
     466            0 :             ma = xdum
     467            0 :             CALL RANDOM_NUMBER(xdum)
     468            0 :             mb = xdum
     469            0 :             ntim = NINT(1.e8_dp/(2.0_dp*REAL(len, KIND=dp)**3))
     470            0 :             ntim = MAX(ntim, 1)
     471            0 :             ntim = MIN(ntim, 1000)
     472              : 
     473            0 :             tstart = m_walltime()
     474            0 :             DO j = 1, ntim
     475            0 :                CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     476              :             END DO
     477            0 :             tend = m_walltime()
     478            0 :             t = tend - tstart + threshold
     479            0 :             IF (t > 0.0_dp) THEN
     480            0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     481              :             ELSE
     482            0 :                perf = 0.0_dp
     483              :             END IF
     484              : 
     485            0 :             IF (para_env%is_source()) THEN
     486              :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     487            0 :                   " Matrix multiply test: c = a * b         Size = ", len, perf, " Mflop/s"
     488              :             END IF
     489              : 
     490            0 :             tstart = m_walltime()
     491            0 :             DO j = 1, ntim
     492            0 :                CALL dgemm("N", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     493              :             END DO
     494            0 :             tend = m_walltime()
     495            0 :             t = tend - tstart + threshold
     496            0 :             IF (t > 0.0_dp) THEN
     497            0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     498              :             ELSE
     499            0 :                perf = 0.0_dp
     500              :             END IF
     501              : 
     502            0 :             IF (para_env%is_source()) THEN
     503              :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     504            0 :                   " Matrix multiply test: a = a * b         Size = ", len, perf, " Mflop/s"
     505              :             END IF
     506              : 
     507            0 :             tstart = m_walltime()
     508            0 :             DO j = 1, ntim
     509            0 :                CALL dgemm("N", "T", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     510              :             END DO
     511            0 :             tend = m_walltime()
     512            0 :             t = tend - tstart + threshold
     513            0 :             IF (t > 0.0_dp) THEN
     514            0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     515              :             ELSE
     516            0 :                perf = 0.0_dp
     517              :             END IF
     518              : 
     519            0 :             IF (para_env%is_source()) THEN
     520              :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     521            0 :                   " Matrix multiply test: c = a * b(T)      Size = ", len, perf, " Mflop/s"
     522              :             END IF
     523              : 
     524            0 :             tstart = m_walltime()
     525            0 :             DO j = 1, ntim
     526            0 :                CALL dgemm("T", "N", len, len, len, 1.0_dp, ma, len, mb, len, 1.0_dp, mc, len)
     527              :             END DO
     528            0 :             tend = m_walltime()
     529            0 :             t = tend - tstart + threshold
     530            0 :             IF (t > 0.0_dp) THEN
     531            0 :                perf = REAL(ntim, KIND=dp)*2.0_dp*REAL(len, KIND=dp)**3*1.e-6_dp/t
     532              :             ELSE
     533            0 :                perf = 0.0_dp
     534              :             END IF
     535              : 
     536            0 :             IF (para_env%is_source()) THEN
     537              :                WRITE (iw, '(A,i6,T59,F14.4,A)') &
     538            0 :                   " Matrix multiply test: c = a(T) * b      Size = ", len, perf, " Mflop/s"
     539              :             END IF
     540              : 
     541            0 :             DEALLOCATE (ma)
     542            0 :             DEALLOCATE (mb)
     543            0 :             DEALLOCATE (mc)
     544              :          END DO
     545              :       END IF
     546              : 
     547            2 :       CALL para_env%sync()
     548              : 
     549            2 :    END SUBROUTINE matmul_test
     550              : 
     551              : ! **************************************************************************************************
     552              : !> \brief Tests the performance of all available FFT libraries for 3D FFTs
     553              : !> \param para_env ...
     554              : !> \param iw ...
     555              : !> \param fftw_plan_type ...
     556              : !> \param wisdom_file where FFTW3 should look to save/load wisdom
     557              : !> \par History
     558              : !>      none
     559              : !> \author JGH  6-NOV-2000
     560              : ! **************************************************************************************************
     561            2 :    SUBROUTINE fft_test(para_env, iw, fftw_plan_type, wisdom_file)
     562              : 
     563              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     564              :       INTEGER                                            :: iw, fftw_plan_type
     565              :       CHARACTER(LEN=*), INTENT(IN)                       :: wisdom_file
     566              : 
     567              :       INTEGER, PARAMETER                                 :: ndate(3) = [12, 48, 96]
     568              : 
     569              :       INTEGER                                            :: iall, ierr, it, j, len, n(3), ntim, &
     570              :                                                             radix_in, radix_out, siz, stat
     571              :       COMPLEX(KIND=dp), DIMENSION(4, 4, 4)               :: zz
     572            2 :       COMPLEX(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)  :: ca, cb, cc
     573              :       CHARACTER(LEN=7)                                   :: method
     574              :       REAL(KIND=dp)                                      :: flops, perf, scale, t, tdiff, tend, &
     575              :                                                             tstart
     576            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :, :)     :: ra
     577              : 
     578              : ! test for 3d FFT
     579              : 
     580            2 :       IF (para_env%is_source()) WRITE (iw, '(//,A,/)') " Test of 3D-FFT "
     581            2 :       siz = ABS(runtest(3))
     582              : 
     583            8 :       DO iall = 1, 100
     584              :          SELECT CASE (iall)
     585              :          CASE DEFAULT
     586            2 :             EXIT
     587              :          CASE (1)
     588              :             CALL init_fft("FFTSG", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
     589            2 :                           pool_limit=10, plan_style=fftw_plan_type)
     590            2 :             method = "FFTSG  "
     591              :          CASE (2)
     592            2 :             CYCLE
     593              :          CASE (3)
     594              :             CALL init_fft("FFTW3", alltoall=.FALSE., fftsg_sizes=.TRUE., wisdom_file=wisdom_file, &
     595            2 :                           pool_limit=10, plan_style=fftw_plan_type)
     596           10 :             method = "FFTW3  "
     597              :          END SELECT
     598           16 :          n = 4
     599            4 :          zz = 0.0_dp
     600            4 :          CALL fft3d(FWFFT, n, zz, status=stat)
     601            4 :          IF (stat == 0) THEN
     602           16 :             DO it = 1, 3
     603           12 :                radix_in = ndate(it)
     604           12 :                CALL fft_radix_operations(radix_in, radix_out, FFT_RADIX_CLOSEST)
     605           12 :                len = radix_out
     606           48 :                n = len
     607           12 :                IF (16.0_dp*REAL(len*len*len, KIND=dp) > max_memory*0.5_dp) EXIT
     608           60 :                ALLOCATE (ra(len, len, len), STAT=ierr)
     609           60 :                ALLOCATE (ca(len, len, len), STAT=ierr)
     610           12 :                CALL RANDOM_NUMBER(ra)
     611      4035516 :                ca(:, :, :) = ra
     612           12 :                CALL RANDOM_NUMBER(ra)
     613      4035516 :                ca(:, :, :) = ca + gaussi*ra
     614           12 :                flops = REAL(len**3, KIND=dp)*15.0_dp*LOG(REAL(len, KIND=dp))
     615           12 :                ntim = NINT(siz*1.e7_dp/flops)
     616           12 :                ntim = MAX(ntim, 1)
     617           12 :                ntim = MIN(ntim, 200)
     618           12 :                scale = 1.0_dp/REAL(len**3, KIND=dp)
     619           12 :                tstart = m_walltime()
     620          884 :                DO j = 1, ntim
     621          872 :                   CALL fft3d(FWFFT, n, ca)
     622          884 :                   CALL fft3d(BWFFT, n, ca)
     623              :                END DO
     624           12 :                tend = m_walltime()
     625           12 :                t = tend - tstart + threshold
     626           12 :                IF (t > 0.0_dp) THEN
     627           12 :                   perf = REAL(ntim, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
     628              :                ELSE
     629            0 :                   perf = 0.0_dp
     630              :                END IF
     631              : 
     632           12 :                IF (para_env%is_source()) THEN
     633              :                   WRITE (iw, '(T2,A,A,i6,T59,F14.4,A)') &
     634            6 :                      ADJUSTR(method), " test (in-place)    Size = ", len, perf, " Mflop/s"
     635              :                END IF
     636           12 :                DEALLOCATE (ca)
     637           16 :                DEALLOCATE (ra)
     638              :             END DO
     639            4 :             IF (para_env%is_source()) WRITE (iw, *)
     640              :             ! test if input data is preserved
     641            4 :             len = 24
     642           16 :             n = len
     643            4 :             ALLOCATE (ra(len, len, len))
     644            4 :             ALLOCATE (ca(len, len, len))
     645            4 :             ALLOCATE (cb(len, len, len))
     646            4 :             ALLOCATE (cc(len, len, len))
     647            4 :             CALL RANDOM_NUMBER(ra)
     648        57700 :             ca(:, :, :) = ra
     649            4 :             CALL RANDOM_NUMBER(ra)
     650        57700 :             ca(:, :, :) = ca + gaussi*ra
     651        57700 :             cc(:, :, :) = ca
     652            4 :             CALL fft3d(FWFFT, n, ca, cb)
     653        57700 :             tdiff = MAXVAL(ABS(ca - cc))
     654            4 :             IF (tdiff > 1.0E-12_dp) THEN
     655            0 :                IF (para_env%is_source()) &
     656            0 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         FWFFT ", &
     657            0 :                   "             Input array is changed in out-of-place FFT !"
     658              :             ELSE
     659            4 :                IF (para_env%is_source()) &
     660            2 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         FWFFT ", &
     661            4 :                   "         Input array is not changed in out-of-place FFT !"
     662              :             END IF
     663        57700 :             ca(:, :, :) = cc
     664            4 :             CALL fft3d(BWFFT, n, ca, cb)
     665        57700 :             tdiff = MAXVAL(ABS(ca - cc))
     666            4 :             IF (tdiff > 1.0E-12_dp) THEN
     667            0 :                IF (para_env%is_source()) &
     668            0 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         BWFFT ", &
     669            0 :                   "             Input array is changed in out-of-place FFT !"
     670              :             ELSE
     671            4 :                IF (para_env%is_source()) &
     672            2 :                   WRITE (iw, '(T2,A,A,A)') ADJUSTR(method), "         BWFFT ", &
     673            4 :                   "         Input array is not changed in out-of-place FFT !"
     674              :             END IF
     675            4 :             IF (para_env%is_source()) WRITE (iw, *)
     676              : 
     677            4 :             DEALLOCATE (ra)
     678            4 :             DEALLOCATE (ca)
     679            4 :             DEALLOCATE (cb)
     680            4 :             DEALLOCATE (cc)
     681              :          END IF
     682            4 :          CALL finalize_fft(para_env, wisdom_file=wisdom_file)
     683              :       END DO
     684              : 
     685            2 :    END SUBROUTINE fft_test
     686              : 
     687              : ! **************************************************************************************************
     688              : !> \brief   test rs_pw_transfer performance
     689              : !> \param para_env ...
     690              : !> \param iw ...
     691              : !> \param globenv ...
     692              : !> \param rs_pw_transfer_section ...
     693              : !> \author  Joost VandeVondele
     694              : !>      9.2008 Randomise rs grid [Iain Bethune]
     695              : !>      (c) The Numerical Algorithms Group (NAG) Ltd, 2008 on behalf of the HECToR project
     696              : ! **************************************************************************************************
     697            2 :    SUBROUTINE rs_pw_transfer_test(para_env, iw, globenv, rs_pw_transfer_section)
     698              : 
     699              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     700              :       INTEGER                                            :: iw
     701              :       TYPE(global_environment_type), POINTER             :: globenv
     702              :       TYPE(section_vals_type), POINTER                   :: rs_pw_transfer_section
     703              : 
     704              :       CHARACTER(LEN=*), PARAMETER :: routineN = 'rs_pw_transfer_test'
     705              : 
     706              :       INTEGER                                            :: halo_size, handle, i_loop, n_loop, ns_max
     707              :       INTEGER, DIMENSION(3)                              :: no, np
     708            2 :       INTEGER, DIMENSION(:), POINTER                     :: i_vals
     709              :       LOGICAL                                            :: do_rs2pw
     710              :       REAL(KIND=dp)                                      :: tend, tstart
     711              :       TYPE(cell_type), POINTER                           :: box
     712              :       TYPE(pw_grid_type), POINTER                        :: grid
     713              :       TYPE(pw_r3d_rs_type)                               :: ca
     714              :       TYPE(realspace_grid_desc_type), POINTER            :: rs_desc
     715              :       TYPE(realspace_grid_input_type)                    :: input_settings
     716           32 :       TYPE(realspace_grid_type)                          :: rs_grid
     717              :       TYPE(section_vals_type), POINTER                   :: rs_grid_section
     718              : 
     719            2 :       CALL timeset(routineN, handle)
     720              : 
     721              :       !..set fft lib
     722              :       CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
     723              :                     pool_limit=globenv%fft_pool_scratch_limit, &
     724              :                     wisdom_file=globenv%fftw_wisdom_file_name, &
     725            2 :                     plan_style=globenv%fftw_plan_type)
     726              : 
     727              :       ! .. set cell (should otherwise be irrelevant)
     728            2 :       NULLIFY (box)
     729            2 :       CALL cell_create(box)
     730              :       box%hmat = RESHAPE([20.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 20.0_dp, 0.0_dp, &
     731           26 :                           0.0_dp, 0.0_dp, 20.0_dp], [3, 3])
     732            2 :       CALL init_cell(box)
     733              : 
     734              :       ! .. grid type and pw_grid
     735            2 :       NULLIFY (grid)
     736            2 :       CALL section_vals_val_get(rs_pw_transfer_section, "GRID", i_vals=i_vals)
     737            8 :       np = i_vals
     738            2 :       CALL pw_grid_create(grid, para_env, box%hmat, grid_span=FULLSPACE, npts=np, fft_usage=.TRUE., iounit=iw)
     739            8 :       no = grid%npts
     740              : 
     741            2 :       CALL ca%create(grid)
     742            2 :       CALL pw_zero(ca)
     743              : 
     744              :       ! .. rs input setting type
     745            2 :       CALL section_vals_val_get(rs_pw_transfer_section, "HALO_SIZE", i_val=halo_size)
     746            2 :       rs_grid_section => section_vals_get_subs_vals(rs_pw_transfer_section, "RS_GRID")
     747            2 :       ns_max = 2*halo_size + 1
     748            2 :       CALL init_input_type(input_settings, ns_max, rs_grid_section, 1, [-1, -1, -1])
     749              : 
     750              :       ! .. rs type
     751            2 :       NULLIFY (rs_desc)
     752            2 :       CALL rs_grid_create_descriptor(rs_desc, pw_grid=grid, input_settings=input_settings)
     753            2 :       CALL rs_grid_create(rs_grid, rs_desc)
     754            2 :       CALL rs_grid_print(rs_grid, iw)
     755            2 :       CALL rs_grid_zero(rs_grid)
     756              : 
     757              :       ! Put random values on the grid, so summation check will pick up errors
     758            2 :       CALL RANDOM_NUMBER(rs_grid%r)
     759              : 
     760            2 :       CALL section_vals_val_get(rs_pw_transfer_section, "N_loop", i_val=N_loop)
     761            2 :       CALL section_vals_val_get(rs_pw_transfer_section, "RS2PW", l_val=do_rs2pw)
     762              : 
     763              :       ! go for the real loops, sync to get max timings
     764            2 :       IF (para_env%is_source()) THEN
     765            1 :          WRITE (iw, '(T2,A)') ""
     766            1 :          WRITE (iw, '(T2,A)') "Timing rs_pw_transfer routine"
     767            1 :          WRITE (iw, '(T2,A)') ""
     768            1 :          WRITE (iw, '(T2,A)') "iteration      time[s]"
     769              :       END IF
     770            8 :       DO i_loop = 1, N_loop
     771            6 :          CALL para_env%sync()
     772            6 :          tstart = m_walltime()
     773            6 :          IF (do_rs2pw) THEN
     774            6 :             CALL transfer_rs2pw(rs_grid, ca)
     775              :          ELSE
     776            0 :             CALL transfer_pw2rs(rs_grid, ca)
     777              :          END IF
     778            6 :          CALL para_env%sync()
     779            6 :          tend = m_walltime()
     780            8 :          IF (para_env%is_source()) THEN
     781            3 :             WRITE (iw, '(T2,I9,1X,F12.6)') i_loop, tend - tstart
     782              :          END IF
     783              :       END DO
     784              : 
     785              :       !cleanup
     786            2 :       CALL rs_grid_release(rs_grid)
     787            2 :       CALL rs_grid_release_descriptor(rs_desc)
     788            2 :       CALL ca%release()
     789            2 :       CALL pw_grid_release(grid)
     790            2 :       CALL cell_release(box)
     791            2 :       CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
     792              : 
     793            2 :       CALL timestop(handle)
     794              : 
     795           10 :    END SUBROUTINE rs_pw_transfer_test
     796              : 
     797              : ! **************************************************************************************************
     798              : !> \brief Tests the performance of PW calls to FFT routines
     799              : !> \param para_env ...
     800              : !> \param iw ...
     801              : !> \param globenv ...
     802              : !> \param pw_transfer_section ...
     803              : !> \par History
     804              : !>      JGH  6-Feb-2001 : Test and performance code
     805              : !>      Made input sensitive [Joost VandeVondele]
     806              : !> \author JGH  1-JAN-2001
     807              : ! **************************************************************************************************
     808           10 :    SUBROUTINE pw_fft_test(para_env, iw, globenv, pw_transfer_section)
     809              : 
     810              :       TYPE(mp_para_env_type), POINTER                    :: para_env
     811              :       INTEGER                                            :: iw
     812              :       TYPE(global_environment_type), POINTER             :: globenv
     813              :       TYPE(section_vals_type), POINTER                   :: pw_transfer_section
     814              : 
     815              :       REAL(KIND=dp), PARAMETER                           :: toler = 1.e-11_dp
     816              : 
     817              :       INTEGER                                            :: blocked_id, grid_span, i_layout, i_rep, &
     818              :                                                             ig, ip, itmp, n_loop, n_rep, nn, p, q
     819           10 :       INTEGER, ALLOCATABLE, DIMENSION(:, :)              :: layouts
     820              :       INTEGER, DIMENSION(2)                              :: distribution_layout
     821              :       INTEGER, DIMENSION(3)                              :: no, np
     822           10 :       INTEGER, DIMENSION(:), POINTER                     :: i_vals
     823              :       LOGICAL                                            :: debug, is_fullspace, odd, &
     824              :                                                             pw_grid_layout_all, spherical
     825              :       REAL(KIND=dp)                                      :: em, et, flops, gsq, perf, t, t_max, &
     826              :                                                             t_min, tend, tstart
     827           10 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: t_end, t_start
     828              :       TYPE(cell_type), POINTER                           :: box
     829              :       TYPE(pw_c1d_gs_type)                               :: ca, cc
     830              :       TYPE(pw_c3d_rs_type)                               :: cb
     831              :       TYPE(pw_grid_type), POINTER                        :: grid
     832              : 
     833              : !..set fft lib
     834              : 
     835              :       CALL init_fft(globenv%default_fft_library, alltoall=.FALSE., fftsg_sizes=.TRUE., &
     836              :                     pool_limit=globenv%fft_pool_scratch_limit, &
     837              :                     wisdom_file=globenv%fftw_wisdom_file_name, &
     838           10 :                     plan_style=globenv%fftw_plan_type)
     839              : 
     840              :       !..the unit cell (should not really matter, the number of grid points do)
     841           10 :       NULLIFY (box, grid)
     842           10 :       CALL cell_create(box)
     843              :       box%hmat = RESHAPE([10.0_dp, 0.0_dp, 0.0_dp, 0.0_dp, 8.0_dp, 0.0_dp, &
     844          130 :                           0.0_dp, 0.0_dp, 7.0_dp], [3, 3])
     845           10 :       CALL init_cell(box)
     846              : 
     847           10 :       CALL section_vals_get(pw_transfer_section, n_repetition=n_rep)
     848           70 :       DO i_rep = 1, n_rep
     849              : 
     850              :          ! how often should we do the transfer
     851           60 :          CALL section_vals_val_get(pw_transfer_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
     852          180 :          ALLOCATE (t_start(N_loop))
     853          120 :          ALLOCATE (t_end(N_loop))
     854              : 
     855              :          ! setup of the grids
     856           60 :          CALL section_vals_val_get(pw_transfer_section, "GRID", i_rep_section=i_rep, i_vals=i_vals)
     857          240 :          np = i_vals
     858              : 
     859           60 :          CALL section_vals_val_get(pw_transfer_section, "PW_GRID_BLOCKED", i_rep_section=i_rep, i_val=blocked_id)
     860           60 :          CALL section_vals_val_get(pw_transfer_section, "DEBUG", i_rep_section=i_rep, l_val=debug)
     861              : 
     862              :          CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT_ALL", i_rep_section=i_rep, &
     863           60 :                                    l_val=pw_grid_layout_all)
     864              : 
     865              :          ! prepare to loop over all or a specific layout
     866           60 :          IF (pw_grid_layout_all) THEN
     867              :             ! count layouts that fit
     868            8 :             itmp = 0
     869              :             ! start from 2, (/1,para_env%num_pe/) is not supported
     870           16 :             DO p = 2, para_env%num_pe
     871            8 :                q = para_env%num_pe/p
     872           16 :                IF (p*q == para_env%num_pe) THEN
     873            8 :                   itmp = itmp + 1
     874              :                END IF
     875              :             END DO
     876              :             ! build list
     877           24 :             ALLOCATE (layouts(2, itmp))
     878            8 :             itmp = 0
     879           16 :             DO p = 2, para_env%num_pe
     880            8 :                q = para_env%num_pe/p
     881           16 :                IF (p*q == para_env%num_pe) THEN
     882            8 :                   itmp = itmp + 1
     883           24 :                   layouts(:, itmp) = [p, q]
     884              :                END IF
     885              :             END DO
     886              :          ELSE
     887           52 :             CALL section_vals_val_get(pw_transfer_section, "PW_GRID_LAYOUT", i_rep_section=i_rep, i_vals=i_vals)
     888           52 :             ALLOCATE (layouts(2, 1))
     889          156 :             layouts(:, 1) = i_vals
     890              :          END IF
     891              : 
     892          120 :          DO i_layout = 1, SIZE(layouts, 2)
     893              : 
     894          180 :             distribution_layout = layouts(:, i_layout)
     895              : 
     896           60 :             CALL section_vals_val_get(pw_transfer_section, "PW_GRID", i_rep_section=i_rep, i_val=itmp)
     897              : 
     898              :             ! from cp_control_utils
     899           16 :             SELECT CASE (itmp)
     900              :             CASE (do_pwgrid_spherical)
     901           16 :                spherical = .TRUE.
     902           16 :                is_fullspace = .FALSE.
     903              :             CASE (do_pwgrid_ns_fullspace)
     904           28 :                spherical = .FALSE.
     905           28 :                is_fullspace = .TRUE.
     906              :             CASE (do_pwgrid_ns_halfspace)
     907           16 :                spherical = .FALSE.
     908           60 :                is_fullspace = .FALSE.
     909              :             END SELECT
     910              : 
     911              :             ! from pw_env_methods
     912           60 :             IF (spherical) THEN
     913           16 :                grid_span = HALFSPACE
     914           16 :                spherical = .TRUE.
     915           16 :                odd = .TRUE.
     916           44 :             ELSE IF (is_fullspace) THEN
     917           28 :                grid_span = FULLSPACE
     918           28 :                spherical = .FALSE.
     919           28 :                odd = .FALSE.
     920              :             ELSE
     921           16 :                grid_span = HALFSPACE
     922           16 :                spherical = .FALSE.
     923           16 :                odd = .TRUE.
     924              :             END IF
     925              : 
     926              :             ! actual setup
     927              :             CALL pw_grid_create(grid, para_env, box%hmat, grid_span=grid_span, odd=odd, spherical=spherical, &
     928              :                                 blocked=blocked_id, npts=np, fft_usage=.TRUE., &
     929           60 :                                 rs_dims=distribution_layout, iounit=iw)
     930              : 
     931           60 :             IF (iw > 0) CALL m_flush(iw)
     932              : 
     933              :             ! note that the number of grid points might be different from what the user requested (fft-able needed)
     934          240 :             no = grid%npts
     935              : 
     936           60 :             CALL ca%create(grid)
     937           60 :             CALL cb%create(grid)
     938           60 :             CALL cc%create(grid)
     939              : 
     940              :             ! initialize data
     941           60 :             CALL pw_zero(ca)
     942           60 :             CALL pw_zero(cb)
     943           60 :             CALL pw_zero(cc)
     944           60 :             nn = SIZE(ca%array)
     945       142042 :             DO ig = 1, nn
     946       141982 :                gsq = grid%gsq(ig)
     947       142042 :                ca%array(ig) = EXP(-gsq)
     948              :             END DO
     949              : 
     950          420 :             flops = PRODUCT(no)*30.0_dp*LOG(REAL(MAXVAL(no), KIND=dp))
     951           60 :             tstart = m_walltime()
     952          596 :             DO ip = 1, n_loop
     953          536 :                CALL para_env%sync()
     954          536 :                t_start(ip) = m_walltime()
     955          536 :                CALL pw_transfer(ca, cb, debug)
     956          536 :                CALL pw_transfer(cb, cc, debug)
     957          536 :                CALL para_env%sync()
     958          596 :                t_end(ip) = m_walltime()
     959              :             END DO
     960           60 :             tend = m_walltime()
     961           60 :             t = tend - tstart + threshold
     962           60 :             IF (t > 0.0_dp) THEN
     963           60 :                perf = REAL(n_loop, KIND=dp)*2.0_dp*flops*1.e-6_dp/t
     964              :             ELSE
     965            0 :                perf = 0.0_dp
     966              :             END IF
     967              : 
     968       142102 :             em = MAXVAL(ABS(ca%array(:) - cc%array(:)))
     969           60 :             CALL para_env%max(em)
     970       142042 :             et = SUM(ABS(ca%array(:) - cc%array(:)))
     971           60 :             CALL para_env%sum(et)
     972          656 :             t_min = MINVAL(t_end - t_start)
     973          656 :             t_max = MAXVAL(t_end - t_start)
     974              : 
     975           60 :             IF (para_env%is_source()) THEN
     976           30 :                WRITE (iw, *)
     977           30 :                WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Maximal Error ", em
     978           30 :                WRITE (iw, '(A,T67,E14.6)') " Parallel FFT Tests: Total Error ", et
     979              :                WRITE (iw, '(A,T67,F14.0)') &
     980           30 :                   " Parallel FFT Tests: Performance [Mflops] ", perf
     981           30 :                WRITE (iw, '(A,T67,F14.6)') " Best time : ", t_min
     982           30 :                WRITE (iw, '(A,T67,F14.6)') " Worst time: ", t_max
     983           30 :                IF (iw > 0) CALL m_flush(iw)
     984              :             END IF
     985              : 
     986              :             ! need debugging ???
     987           60 :             IF (em > toler .OR. et > toler) THEN
     988            0 :                CPWARN("The FFT results are not accurate ... starting debug pw_transfer")
     989            0 :                CALL pw_transfer(ca, cb, .TRUE.)
     990            0 :                CALL pw_transfer(cb, cc, .TRUE.)
     991              :             END IF
     992              : 
     993              :             ! done with these grids
     994           60 :             CALL ca%release()
     995           60 :             CALL cb%release()
     996           60 :             CALL cc%release()
     997          180 :             CALL pw_grid_release(grid)
     998              : 
     999              :          END DO
    1000              : 
    1001              :          ! local arrays
    1002           60 :          DEALLOCATE (layouts)
    1003           60 :          DEALLOCATE (t_start)
    1004          190 :          DEALLOCATE (t_end)
    1005              : 
    1006              :       END DO
    1007              : 
    1008              :       ! cleanup
    1009           10 :       CALL cell_release(box)
    1010           10 :       CALL finalize_fft(para_env, wisdom_file=globenv%fftw_wisdom_file_name)
    1011              : 
    1012           20 :    END SUBROUTINE pw_fft_test
    1013              : 
    1014              : ! **************************************************************************************************
    1015              : !> \brief Tests the eigensolver library routines
    1016              : !> \param para_env ...
    1017              : !> \param iw ...
    1018              : !> \param eigensolver_section ...
    1019              : !> \par History
    1020              : !>      JGH  6-Feb-2001 : Test and performance code
    1021              : !> \author JGH  1-JAN-2001
    1022              : ! **************************************************************************************************
    1023            2 :    SUBROUTINE eigensolver_test(para_env, iw, eigensolver_section)
    1024              : 
    1025              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1026              :       INTEGER                                            :: iw
    1027              :       TYPE(section_vals_type), POINTER                   :: eigensolver_section
    1028              : 
    1029              :       INTEGER                                            :: diag_method, i, i_loop, i_rep, &
    1030              :                                                             init_method, j, n, n_loop, n_rep, &
    1031              :                                                             neig, unit_number
    1032              :       REAL(KIND=dp)                                      :: t1, t2
    1033            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: eigenvalues
    1034            2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:, :)        :: buffer
    1035              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1036              :       TYPE(cp_fm_struct_type), POINTER                   :: fmstruct
    1037              :       TYPE(cp_fm_type)                                   :: eigenvectors, matrix, work
    1038            2 :       TYPE(rng_stream_type), ALLOCATABLE                 :: rng_stream
    1039              : 
    1040            2 :       IF (iw > 0) THEN
    1041            1 :          WRITE (UNIT=iw, FMT="(/,/,T2,A,/)") "EIGENSOLVER TEST"
    1042              :       END IF
    1043              : 
    1044              :       ! create blacs env corresponding to para_env
    1045            2 :       NULLIFY (blacs_env)
    1046              :       CALL cp_blacs_env_create(blacs_env=blacs_env, &
    1047            2 :                                para_env=para_env)
    1048              : 
    1049              :       ! loop over all tests
    1050            2 :       CALL section_vals_get(eigensolver_section, n_repetition=n_rep)
    1051           10 :       DO i_rep = 1, n_rep
    1052              : 
    1053              :          ! parse section
    1054            8 :          CALL section_vals_val_get(eigensolver_section, "N", i_rep_section=i_rep, i_val=n)
    1055            8 :          CALL section_vals_val_get(eigensolver_section, "EIGENVALUES", i_rep_section=i_rep, i_val=neig)
    1056            8 :          CALL section_vals_val_get(eigensolver_section, "DIAG_METHOD", i_rep_section=i_rep, i_val=diag_method)
    1057            8 :          CALL section_vals_val_get(eigensolver_section, "INIT_METHOD", i_rep_section=i_rep, i_val=init_method)
    1058            8 :          CALL section_vals_val_get(eigensolver_section, "N_loop", i_rep_section=i_rep, i_val=n_loop)
    1059              : 
    1060              :          ! proper number of eigs
    1061            8 :          IF (neig < 0) neig = n
    1062            8 :          neig = MIN(neig, n)
    1063              : 
    1064              :          ! report
    1065            8 :          IF (iw > 0) THEN
    1066            4 :             WRITE (iw, *) "Matrix size", n
    1067            4 :             WRITE (iw, *) "Number of eigenvalues", neig
    1068            4 :             WRITE (iw, *) "Timing loops", n_loop
    1069            2 :             SELECT CASE (diag_method)
    1070              :             CASE (do_diag_syevd)
    1071            2 :                WRITE (iw, *) "Diag using syevd"
    1072              :             CASE (do_diag_syevx)
    1073            4 :                WRITE (iw, *) "Diag using syevx"
    1074              :             CASE DEFAULT
    1075              :                ! stop
    1076              :             END SELECT
    1077              : 
    1078            4 :             SELECT CASE (init_method)
    1079              :             CASE (do_mat_random)
    1080            4 :                WRITE (iw, *) "using random matrix"
    1081              :             CASE (do_mat_read)
    1082            4 :                WRITE (iw, *) "reading from file"
    1083              :             CASE DEFAULT
    1084              :                ! stop
    1085              :             END SELECT
    1086              :          END IF
    1087              : 
    1088              :          ! create matrix struct type
    1089            8 :          NULLIFY (fmstruct)
    1090              :          CALL cp_fm_struct_create(fmstruct=fmstruct, &
    1091              :                                   para_env=para_env, &
    1092              :                                   context=blacs_env, &
    1093              :                                   nrow_global=n, &
    1094            8 :                                   ncol_global=n)
    1095              : 
    1096              :          ! create all needed matrices, and buffers for the eigenvalues
    1097              :          CALL cp_fm_create(matrix=matrix, &
    1098              :                            matrix_struct=fmstruct, &
    1099            8 :                            name="MATRIX")
    1100            8 :          CALL cp_fm_set_all(matrix, 0.0_dp)
    1101              : 
    1102              :          CALL cp_fm_create(matrix=eigenvectors, &
    1103              :                            matrix_struct=fmstruct, &
    1104            8 :                            name="EIGENVECTORS")
    1105            8 :          CALL cp_fm_set_all(eigenvectors, 0.0_dp)
    1106              : 
    1107              :          CALL cp_fm_create(matrix=work, &
    1108              :                            matrix_struct=fmstruct, &
    1109            8 :                            name="WORK")
    1110            8 :          CALL cp_fm_set_all(matrix, 0.0_dp)
    1111              : 
    1112           24 :          ALLOCATE (eigenvalues(n))
    1113          100 :          eigenvalues = 0.0_dp
    1114           16 :          ALLOCATE (buffer(1, n))
    1115              : 
    1116              :          ! generate initial matrix, either by reading a file, or using random numbers
    1117            8 :          IF (para_env%is_source()) THEN
    1118            4 :             SELECT CASE (init_method)
    1119              :             CASE (do_mat_random)
    1120              :                rng_stream = rng_stream_type( &
    1121              :                             name="rng_stream", &
    1122              :                             distribution_type=UNIFORM, &
    1123            4 :                             extended_precision=.TRUE.)
    1124              :             CASE (do_mat_read)
    1125              :                CALL open_file(file_name="MATRIX", &
    1126              :                               file_action="READ", &
    1127              :                               file_form="FORMATTED", &
    1128              :                               file_status="OLD", &
    1129            4 :                               unit_number=unit_number)
    1130              :             END SELECT
    1131              :          END IF
    1132              : 
    1133          100 :          DO i = 1, n
    1134           92 :             IF (para_env%is_source()) THEN
    1135              :                SELECT CASE (init_method)
    1136              :                CASE (do_mat_random)
    1137          347 :                   DO j = i, n
    1138          347 :                      buffer(1, j) = rng_stream%next() - 0.5_dp
    1139              :                   END DO
    1140              :                   !MK activate/modify for a diagonal dominant symmetric matrix:
    1141              :                   !MK buffer(1,i) = 10.0_dp*buffer(1,i)
    1142              :                CASE (do_mat_read)
    1143           46 :                   READ (UNIT=unit_number, FMT=*) buffer(1, 1:n)
    1144              :                END SELECT
    1145              :             END IF
    1146           92 :             CALL para_env%bcast(buffer)
    1147            8 :             SELECT CASE (init_method)
    1148              :             CASE (do_mat_random)
    1149              :                CALL cp_fm_set_submatrix(fm=matrix, &
    1150              :                                         new_values=buffer, &
    1151              :                                         start_row=i, &
    1152              :                                         start_col=i, &
    1153              :                                         n_rows=1, &
    1154              :                                         n_cols=n - i + 1, &
    1155              :                                         alpha=1.0_dp, &
    1156              :                                         beta=0.0_dp, &
    1157           92 :                                         transpose=.FALSE.)
    1158              :                CALL cp_fm_set_submatrix(fm=matrix, &
    1159              :                                         new_values=buffer, &
    1160              :                                         start_row=i, &
    1161              :                                         start_col=i, &
    1162              :                                         n_rows=n - i + 1, &
    1163              :                                         n_cols=1, &
    1164              :                                         alpha=1.0_dp, &
    1165              :                                         beta=0.0_dp, &
    1166           92 :                                         transpose=.TRUE.)
    1167              :             CASE (do_mat_read)
    1168              :                CALL cp_fm_set_submatrix(fm=matrix, &
    1169              :                                         new_values=buffer, &
    1170              :                                         start_row=i, &
    1171              :                                         start_col=1, &
    1172              :                                         n_rows=1, &
    1173              :                                         n_cols=n, &
    1174              :                                         alpha=1.0_dp, &
    1175              :                                         beta=0.0_dp, &
    1176           92 :                                         transpose=.FALSE.)
    1177              :             END SELECT
    1178              :          END DO
    1179              : 
    1180            8 :          DEALLOCATE (buffer)
    1181              : 
    1182            8 :          IF (para_env%is_source()) THEN
    1183            0 :             SELECT CASE (init_method)
    1184              :             CASE (do_mat_read)
    1185            4 :                CALL close_file(unit_number=unit_number)
    1186              :             END SELECT
    1187              :          END IF
    1188              : 
    1189           88 :          DO i_loop = 1, n_loop
    1190         1000 :             eigenvalues = 0.0_dp
    1191           80 :             CALL cp_fm_set_all(eigenvectors, 0.0_dp)
    1192              :             CALL cp_fm_to_fm(source=matrix, &
    1193           80 :                              destination=work)
    1194              : 
    1195              :             ! DONE, now testing
    1196           80 :             t1 = m_walltime()
    1197           40 :             SELECT CASE (diag_method)
    1198              :             CASE (do_diag_syevd)
    1199              :                CALL cp_fm_syevd(matrix=work, &
    1200              :                                 eigenvectors=eigenvectors, &
    1201           40 :                                 eigenvalues=eigenvalues)
    1202              :             CASE (do_diag_syevx)
    1203              :                CALL cp_fm_syevx(matrix=work, &
    1204              :                                 eigenvectors=eigenvectors, &
    1205              :                                 eigenvalues=eigenvalues, &
    1206              :                                 neig=neig, &
    1207           80 :                                 work_syevx=1.0_dp)
    1208              :             END SELECT
    1209           80 :             t2 = m_walltime()
    1210           88 :             IF (iw > 0) WRITE (iw, *) "Timing for loop ", i_loop, " : ", t2 - t1
    1211              :          END DO
    1212              : 
    1213            8 :          IF (iw > 0) THEN
    1214            4 :             WRITE (iw, *) "Eigenvalues: "
    1215            4 :             WRITE (UNIT=iw, FMT="(T3,5F14.6)") eigenvalues(1:neig)
    1216           42 :             WRITE (UNIT=iw, FMT="(T3,A4,F16.6)") "Sum:", SUM(eigenvalues(1:neig))
    1217            4 :             WRITE (iw, *) ""
    1218              :          END IF
    1219              : 
    1220              :          ! Clean up
    1221            8 :          DEALLOCATE (eigenvalues)
    1222            8 :          CALL cp_fm_release(matrix=work)
    1223            8 :          CALL cp_fm_release(matrix=eigenvectors)
    1224            8 :          CALL cp_fm_release(matrix=matrix)
    1225           42 :          CALL cp_fm_struct_release(fmstruct=fmstruct)
    1226              : 
    1227              :       END DO
    1228              : 
    1229            2 :       CALL cp_blacs_env_release(blacs_env=blacs_env)
    1230              : 
    1231            4 :    END SUBROUTINE eigensolver_test
    1232              : 
    1233              : ! **************************************************************************************************
    1234              : !> \brief Tests the parallel matrix multiply
    1235              : !> \param para_env ...
    1236              : !> \param iw ...
    1237              : !> \param cp_fm_gemm_test_section ...
    1238              : ! **************************************************************************************************
    1239            4 :    SUBROUTINE cp_fm_gemm_test(para_env, iw, cp_fm_gemm_test_section)
    1240              : 
    1241              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1242              :       INTEGER                                            :: iw
    1243              :       TYPE(section_vals_type), POINTER                   :: cp_fm_gemm_test_section
    1244              : 
    1245              :       CHARACTER(LEN=1)                                   :: transa, transb
    1246              :       INTEGER :: i_loop, i_rep, k, m, n, N_loop, n_rep, ncol_block, ncol_block_actual, &
    1247              :          ncol_global, np, nrow_block, nrow_block_actual, nrow_global
    1248            4 :       INTEGER, DIMENSION(:), POINTER                     :: grid_2d
    1249              :       LOGICAL                                            :: force_blocksize, row_major, transa_p, &
    1250              :                                                             transb_p
    1251              :       REAL(KIND=dp)                                      :: t1, t2, t3, t4
    1252              :       TYPE(cp_blacs_env_type), POINTER                   :: blacs_env
    1253              :       TYPE(cp_fm_struct_type), POINTER                   :: fmstruct_a, fmstruct_b, fmstruct_c
    1254              :       TYPE(cp_fm_type)                                   :: matrix_a, matrix_b, matrix_c
    1255              : 
    1256            4 :       CALL section_vals_get(cp_fm_gemm_test_section, n_repetition=n_rep)
    1257           24 :       DO i_rep = 1, n_rep
    1258              : 
    1259              :          ! how often should we do the multiply
    1260           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1261              : 
    1262              :          ! matrices def.
    1263           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "K", i_rep_section=i_rep, i_val=k)
    1264           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "N", i_rep_section=i_rep, i_val=n)
    1265           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "M", i_rep_section=i_rep, i_val=m)
    1266           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1267           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1268           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "nrow_block", i_rep_section=i_rep, i_val=nrow_block)
    1269           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "ncol_block", i_rep_section=i_rep, i_val=ncol_block)
    1270           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "ROW_MAJOR", i_rep_section=i_rep, l_val=row_major)
    1271           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "GRID_2D", i_rep_section=i_rep, i_vals=grid_2d)
    1272           20 :          CALL section_vals_val_get(cp_fm_gemm_test_section, "FORCE_BLOCKSIZE", i_rep_section=i_rep, l_val=force_blocksize)
    1273           20 :          transa = "N"
    1274           20 :          transb = "N"
    1275           20 :          IF (transa_p) transa = "T"
    1276           20 :          IF (transb_p) transb = "T"
    1277              : 
    1278           20 :          IF (iw > 0) THEN
    1279           10 :             WRITE (iw, '(T2,A)') "----------- TESTING PARALLEL MATRIX MULTIPLY -------------"
    1280           10 :             WRITE (iw, '(T2,A)', ADVANCE="NO") "C = "
    1281           10 :             IF (transa_p) THEN
    1282            2 :                WRITE (iw, '(A)', ADVANCE="NO") "TRANSPOSE(A) x"
    1283              :             ELSE
    1284            8 :                WRITE (iw, '(A)', ADVANCE="NO") "A x "
    1285              :             END IF
    1286           10 :             IF (transb_p) THEN
    1287            2 :                WRITE (iw, '(A)') "TRANSPOSE(B) "
    1288              :             ELSE
    1289            8 :                WRITE (iw, '(A)') "B "
    1290              :             END IF
    1291           10 :             WRITE (iw, '(T2,A,T50,I5,A,I5)') 'requested block size', nrow_block, ' by ', ncol_block
    1292           10 :             WRITE (iw, '(T2,A,T50,I5)') 'number of repetitions of cp_fm_gemm ', n_loop
    1293           10 :             WRITE (iw, '(T2,A,T50,L5)') 'Row Major', row_major
    1294           30 :             WRITE (iw, '(T2,A,T50,2I7)') 'GRID_2D ', grid_2d
    1295           10 :             WRITE (iw, '(T2,A,T50,L5)') 'Force blocksize ', force_blocksize
    1296              :             ! check the return value of pilaenv, too small values limit the performance (assuming pdgemm is the vanilla variant)
    1297           10 :             np = cp_fm_pilaenv(0, 'D')
    1298           10 :             IF (np > 0) THEN
    1299           10 :                WRITE (iw, '(T2,A,T50,I5)') 'PILAENV blocksize', np
    1300              :             END IF
    1301              :          END IF
    1302              : 
    1303           20 :          NULLIFY (blacs_env)
    1304              :          CALL cp_blacs_env_create(blacs_env=blacs_env, &
    1305              :                                   para_env=para_env, &
    1306              :                                   row_major=row_major, &
    1307           20 :                                   grid_2d=grid_2d)
    1308              : 
    1309           20 :          NULLIFY (fmstruct_a)
    1310           20 :          IF (transa_p) THEN
    1311            4 :             nrow_global = m; ncol_global = k
    1312              :          ELSE
    1313           16 :             nrow_global = k; ncol_global = m
    1314              :          END IF
    1315              :          CALL cp_fm_struct_create(fmstruct=fmstruct_a, para_env=para_env, context=blacs_env, &
    1316              :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1317           20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1318           20 :          CALL cp_fm_struct_get(fmstruct_a, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1319           30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix A ', nrow_global, " by ", ncol_global, &
    1320           20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1321              : 
    1322           20 :          IF (transb_p) THEN
    1323            4 :             nrow_global = n; ncol_global = m
    1324              :          ELSE
    1325           16 :             nrow_global = m; ncol_global = n
    1326              :          END IF
    1327           20 :          NULLIFY (fmstruct_b)
    1328              :          CALL cp_fm_struct_create(fmstruct=fmstruct_b, para_env=para_env, context=blacs_env, &
    1329              :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1330           20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1331           20 :          CALL cp_fm_struct_get(fmstruct_b, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1332           30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix B ', nrow_global, " by ", ncol_global, &
    1333           20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1334              : 
    1335           20 :          NULLIFY (fmstruct_c)
    1336           20 :          nrow_global = k
    1337           20 :          ncol_global = n
    1338              :          CALL cp_fm_struct_create(fmstruct=fmstruct_c, para_env=para_env, context=blacs_env, &
    1339              :                                   nrow_global=nrow_global, ncol_global=ncol_global, &
    1340           20 :                                   nrow_block=nrow_block, ncol_block=ncol_block, force_block=force_blocksize)
    1341           20 :          CALL cp_fm_struct_get(fmstruct_c, nrow_block=nrow_block_actual, ncol_block=ncol_block_actual)
    1342           30 :          IF (iw > 0) WRITE (iw, '(T2,A,I9,A,I9,A,I5,A,I5)') 'matrix C ', nrow_global, " by ", ncol_global, &
    1343           20 :             ' using blocks of ', nrow_block_actual, ' by ', ncol_block_actual
    1344              : 
    1345           20 :          CALL cp_fm_create(matrix=matrix_a, matrix_struct=fmstruct_a, name="MATRIX A")
    1346           20 :          CALL cp_fm_create(matrix=matrix_b, matrix_struct=fmstruct_b, name="MATRIX B")
    1347           20 :          CALL cp_fm_create(matrix=matrix_c, matrix_struct=fmstruct_c, name="MATRIX C")
    1348              : 
    1349           20 :          CALL RANDOM_NUMBER(matrix_a%local_data)
    1350           20 :          CALL RANDOM_NUMBER(matrix_b%local_data)
    1351           20 :          CALL RANDOM_NUMBER(matrix_c%local_data)
    1352              : 
    1353           20 :          IF (iw > 0) CALL m_flush(iw)
    1354              : 
    1355           20 :          t1 = m_walltime()
    1356          932 :          DO i_loop = 1, N_loop
    1357          912 :             t3 = m_walltime()
    1358          912 :             CALL parallel_gemm(transa, transb, k, n, m, 1.0_dp, matrix_a, matrix_b, 0.0_dp, matrix_c)
    1359          912 :             t4 = m_walltime()
    1360          932 :             IF (iw > 0) THEN
    1361          456 :                WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm timing: ", (t4 - t3)
    1362          456 :                CALL m_flush(iw)
    1363              :             END IF
    1364              :          END DO
    1365           20 :          t2 = m_walltime()
    1366              : 
    1367           20 :          IF (iw > 0) THEN
    1368           10 :             WRITE (iw, '(T2,A,T50,F12.6)') "average cp_fm_gemm timing: ", (t2 - t1)/N_loop
    1369           10 :             IF (t2 > t1) THEN
    1370           10 :                WRITE (iw, '(T2,A,T50,F12.6)') "cp_fm_gemm Gflops per MPI task: ", &
    1371           20 :                   2*REAL(m, kind=dp)*REAL(n, kind=dp)*REAL(k, kind=dp)*N_loop/MAX(0.001_dp, t2 - t1)/1.0E9_dp/para_env%num_pe
    1372              :             END IF
    1373              :          END IF
    1374              : 
    1375           20 :          CALL cp_fm_release(matrix=matrix_a)
    1376           20 :          CALL cp_fm_release(matrix=matrix_b)
    1377           20 :          CALL cp_fm_release(matrix=matrix_c)
    1378           20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_a)
    1379           20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_b)
    1380           20 :          CALL cp_fm_struct_release(fmstruct=fmstruct_c)
    1381          144 :          CALL cp_blacs_env_release(blacs_env=blacs_env)
    1382              : 
    1383              :       END DO
    1384              : 
    1385            4 :    END SUBROUTINE cp_fm_gemm_test
    1386              : 
    1387              : ! **************************************************************************************************
    1388              : !> \brief Tests the DBCSR interface.
    1389              : !> \param para_env ...
    1390              : !> \param iw ...
    1391              : !> \param input_section ...
    1392              : ! **************************************************************************************************
    1393           32 :    SUBROUTINE cp_dbcsr_tests(para_env, iw, input_section)
    1394              : 
    1395              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1396              :       INTEGER                                            :: iw
    1397              :       TYPE(section_vals_type), POINTER                   :: input_section
    1398              : 
    1399              :       CHARACTER, DIMENSION(3)                            :: types
    1400              :       INTEGER                                            :: data_type, i_rep, k, m, n, N_loop, &
    1401              :                                                             n_rep, test_type
    1402           32 :       INTEGER, DIMENSION(:), POINTER                     :: bs_k, bs_m, bs_n, nproc
    1403              :       LOGICAL                                            :: always_checksum, retain_sparsity, &
    1404              :                                                             transa_p, transb_p
    1405              :       REAL(KIND=dp)                                      :: alpha, beta, filter_eps, s_a, s_b, s_c
    1406              : 
    1407              : !   ---------------------------------------------------------------------------
    1408              : 
    1409           32 :       NULLIFY (bs_m, bs_n, bs_k)
    1410           32 :       CALL section_vals_get(input_section, n_repetition=n_rep)
    1411           32 :       CALL dbcsr_reset_randmat_seed()
    1412           78 :       DO i_rep = 1, n_rep
    1413              :          ! how often should we do the multiply
    1414           46 :          CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1415              : 
    1416              :          ! matrices def.
    1417           46 :          CALL section_vals_val_get(input_section, "TEST_TYPE", i_rep_section=i_rep, i_val=test_type)
    1418           46 :          CALL section_vals_val_get(input_section, "DATA_TYPE", i_rep_section=i_rep, i_val=data_type)
    1419           46 :          CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
    1420           46 :          CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
    1421           46 :          CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
    1422           46 :          CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1423           46 :          CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1424              :          CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, &
    1425           46 :                                    i_vals=bs_m)
    1426              :          CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, &
    1427           46 :                                    i_vals=bs_n)
    1428              :          CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, &
    1429           46 :                                    i_vals=bs_k)
    1430           46 :          CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
    1431           46 :          CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
    1432           46 :          CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
    1433           46 :          CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
    1434           46 :          CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
    1435           46 :          CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
    1436              :          CALL section_vals_val_get(input_section, "nproc", i_rep_section=i_rep, &
    1437           46 :                                    i_vals=nproc)
    1438              :          CALL section_vals_val_get(input_section, "atype", i_rep_section=i_rep, &
    1439           46 :                                    c_val=types(1))
    1440              :          CALL section_vals_val_get(input_section, "btype", i_rep_section=i_rep, &
    1441           46 :                                    c_val=types(2))
    1442              :          CALL section_vals_val_get(input_section, "ctype", i_rep_section=i_rep, &
    1443           46 :                                    c_val=types(3))
    1444              :          CALL section_vals_val_get(input_section, "filter_eps", &
    1445           46 :                                    i_rep_section=i_rep, r_val=filter_eps)
    1446           46 :          CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
    1447              : 
    1448              :          CALL dbcsr_run_tests(para_env%get_handle(), iw, nproc, &
    1449              :                               [m, n, k], &
    1450              :                               [transa_p, transb_p], &
    1451              :                               bs_m, bs_n, bs_k, &
    1452              :                               [s_a, s_b, s_c], &
    1453              :                               alpha, beta, &
    1454              :                               data_type=data_type, &
    1455              :                               test_type=test_type, &
    1456              :                               n_loops=n_loop, eps=filter_eps, retain_sparsity=retain_sparsity, &
    1457          538 :                               always_checksum=always_checksum)
    1458              :       END DO
    1459           32 :    END SUBROUTINE cp_dbcsr_tests
    1460              : 
    1461              : ! **************************************************************************************************
    1462              : !> \brief Tests the DBM library.
    1463              : !> \param para_env ...
    1464              : !> \param iw ...
    1465              : !> \param input_section ...
    1466              : ! **************************************************************************************************
    1467           14 :    SUBROUTINE run_dbm_tests(para_env, iw, input_section)
    1468              : 
    1469              :       TYPE(mp_para_env_type), POINTER                    :: para_env
    1470              :       INTEGER                                            :: iw
    1471              :       TYPE(section_vals_type), POINTER                   :: input_section
    1472              : 
    1473              :       INTEGER                                            :: i_rep, k, m, n, N_loop, n_rep
    1474           14 :       INTEGER, DIMENSION(:), POINTER                     :: bs_k, bs_m, bs_n
    1475              :       LOGICAL                                            :: always_checksum, retain_sparsity, &
    1476              :                                                             transa_p, transb_p
    1477              :       REAL(KIND=dp)                                      :: alpha, beta, filter_eps, s_a, s_b, s_c
    1478              : 
    1479              : !   ---------------------------------------------------------------------------
    1480              : 
    1481           14 :       NULLIFY (bs_m, bs_n, bs_k)
    1482           14 :       CALL section_vals_get(input_section, n_repetition=n_rep)
    1483           14 :       CALL dbcsr_reset_randmat_seed()
    1484           28 :       DO i_rep = 1, n_rep
    1485           14 :          CALL section_vals_val_get(input_section, "N_loop", i_rep_section=i_rep, i_val=N_loop)
    1486           14 :          CALL section_vals_val_get(input_section, "K", i_rep_section=i_rep, i_val=k)
    1487           14 :          CALL section_vals_val_get(input_section, "N", i_rep_section=i_rep, i_val=n)
    1488           14 :          CALL section_vals_val_get(input_section, "M", i_rep_section=i_rep, i_val=m)
    1489           14 :          CALL section_vals_val_get(input_section, "transa", i_rep_section=i_rep, l_val=transa_p)
    1490           14 :          CALL section_vals_val_get(input_section, "transb", i_rep_section=i_rep, l_val=transb_p)
    1491           14 :          CALL section_vals_val_get(input_section, "bs_m", i_rep_section=i_rep, i_vals=bs_m)
    1492           14 :          CALL section_vals_val_get(input_section, "bs_n", i_rep_section=i_rep, i_vals=bs_n)
    1493           14 :          CALL section_vals_val_get(input_section, "bs_k", i_rep_section=i_rep, i_vals=bs_k)
    1494           14 :          CALL section_vals_val_get(input_section, "keepsparse", i_rep_section=i_rep, l_val=retain_sparsity)
    1495           14 :          CALL section_vals_val_get(input_section, "asparsity", i_rep_section=i_rep, r_val=s_a)
    1496           14 :          CALL section_vals_val_get(input_section, "bsparsity", i_rep_section=i_rep, r_val=s_b)
    1497           14 :          CALL section_vals_val_get(input_section, "csparsity", i_rep_section=i_rep, r_val=s_c)
    1498           14 :          CALL section_vals_val_get(input_section, "alpha", i_rep_section=i_rep, r_val=alpha)
    1499           14 :          CALL section_vals_val_get(input_section, "beta", i_rep_section=i_rep, r_val=beta)
    1500           14 :          CALL section_vals_val_get(input_section, "filter_eps", i_rep_section=i_rep, r_val=filter_eps)
    1501           14 :          CALL section_vals_val_get(input_section, "ALWAYS_CHECKSUM", i_rep_section=i_rep, l_val=always_checksum)
    1502              : 
    1503              :          CALL dbm_run_tests(mp_group=para_env, &
    1504              :                             io_unit=iw, &
    1505              :                             matrix_sizes=[m, n, k], &
    1506              :                             trs=[transa_p, transb_p], &
    1507              :                             bs_m=bs_m, &
    1508              :                             bs_n=bs_n, &
    1509              :                             bs_k=bs_k, &
    1510              :                             sparsities=[s_a, s_b, s_c], &
    1511              :                             alpha=alpha, &
    1512              :                             beta=beta, &
    1513              :                             n_loops=n_loop, &
    1514              :                             eps=filter_eps, &
    1515              :                             retain_sparsity=retain_sparsity, &
    1516          154 :                             always_checksum=always_checksum)
    1517              :       END DO
    1518           14 :    END SUBROUTINE run_dbm_tests
    1519              : 
    1520         8484 : END MODULE library_tests
        

Generated by: LCOV version 2.0-1