LCOV - code coverage report
Current view: top level - src/mpiwrap - mp_perf_test.F (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:b279b6b) Lines: 259 259 100.0 %
Date: 2024-04-24 07:13:09 Functions: 1 1 100.0 %

          Line data    Source code
       1             : !--------------------------------------------------------------------------------------------------!
       2             : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3             : !   Copyright 2000-2024 CP2K developers group <https://cp2k.org>                                   !
       4             : !                                                                                                  !
       5             : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6             : !--------------------------------------------------------------------------------------------------!
       7             : 
       8             : ! **************************************************************************************************
       9             : !> \brief Interface to the message passing library MPI
      10             : !> \par History
      11             : !>      JGH (02-Jan-2001): New error handling
      12             : !>                         Performance tools
      13             : !>      JGH (14-Jan-2001): New routines mp_comm_compare, mp_cart_coords,
      14             : !>                                      mp_rank_compare, mp_alltoall
      15             : !>      JGH (06-Feb-2001): New routines mp_comm_free
      16             : !>      JGH (22-Mar-2001): New routines mp_comm_dup
      17             : !>      fawzi (04-NOV-2004): storable performance info (for f77 interface)
      18             : !>      Wrapper routine for mpi_gatherv added (22.12.2005,MK)
      19             : !>      JGH (13-Feb-2006): Flexible precision
      20             : !>      JGH (15-Feb-2006): single precision mp_alltoall
      21             : !> \author JGH
      22             : ! **************************************************************************************************
      23             : MODULE mp_perf_test
      24             :    USE kinds, ONLY: dp
      25             :    USE message_passing, ONLY: mp_comm_type
      26             :    ! some benchmarking code
      27             : #include "../base/base_uses.f90"
      28             : 
      29             : #if defined(__parallel)
      30             : #if defined(__MPI_F08)
      31             :    USE mpi_f08, ONLY: mpi_wtime
      32             : #else
      33             :    USE mpi, ONLY: mpi_wtime
      34             : #endif
      35             : #endif
      36             : 
      37             :    PRIVATE
      38             : 
      39             :    PUBLIC :: mpi_perf_test
      40             : 
      41             : CONTAINS
      42             : 
      43             : ! **************************************************************************************************
      44             : !> \brief Tests the MPI library
      45             : !> \param comm the relevant, initialized communicator
      46             : !> \param npow number of sizes to test, 10**1 .. 10**npow
      47             : !> \param output_unit where to direct output
      48             : !> \par History
      49             : !>      JGH  6-Feb-2001 : Test and performance code
      50             : !> \author JGH  1-JAN-2001
      51             : !> \note
      52             : !>      quickly adapted benchmark code, will only work on an even number of CPUs.
      53             : ! **************************************************************************************************
      54           2 :    SUBROUTINE mpi_perf_test(comm, npow, output_unit)
      55             :       CLASS(mp_comm_type), INTENT(IN) :: comm
      56             :       INTEGER, INTENT(IN)                      :: npow, output_unit
      57             : 
      58             : #if defined(__parallel)
      59             : 
      60             :       INTEGER :: I, itask, itests, J, jtask, left, nbufmax, &
      61             :                  ncount, Ngrid, Nloc, nprocs, Ntot, partner, right, taskid, tag, source
      62           2 :       INTEGER, ALLOCATABLE, DIMENSION(:)       :: rcount, rdispl, scount, sdispl
      63             :       LOGICAL                                  :: ionode
      64             :       REAL(KIND=dp)                            :: maxdiff, t1, &
      65             :                                                   t2, t3, t4, t5
      66           2 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: buffer1, buffer2, buffer3, &
      67           2 :                                                   lgrid, lgrid2, lgrid3
      68             :       REAL(KIND=dp), ALLOCATABLE, &
      69           2 :          DIMENSION(:, :)                        :: grid, grid2, grid3, &
      70           2 :                                                    send_timings, send_timings2
      71             :       REAL(KIND=dp), PARAMETER :: threshold = 1.0E-8_dp
      72             : 
      73             :       ! set system sizes !
      74           2 :       ngrid = 10**npow
      75             : 
      76           2 :       taskid = comm%mepos
      77           2 :       nprocs = comm%num_pe
      78           2 :       ionode = comm%is_source()
      79           2 :       IF (ionode .AND. output_unit > 0) THEN
      80           1 :          WRITE (output_unit, *) "Running with ", nprocs
      81           1 :          WRITE (output_unit, *) "running messages with npow = ", npow
      82           1 :          WRITE (output_unit, *) "use MPI X in the input for larger (e.g. 6) of smaller (e.g. 3) messages"
      83           1 :          IF (MODULO(nprocs, 2) .NE. 0) WRITE (output_unit, *) "Testing only with an even number of tasks"
      84             :       END IF
      85             : 
      86           2 :       IF (MODULO(nprocs, 2) .NE. 0) RETURN
      87             : 
      88             :       ! equal loads
      89           2 :       Nloc = Ngrid/nprocs
      90           2 :       Ntot = Nprocs*Nloc
      91           2 :       nbufmax = 10**npow
      92             :       !
      93           6 :       ALLOCATE (rcount(nprocs))
      94           6 :       ALLOCATE (scount(nprocs))
      95           6 :       ALLOCATE (sdispl(nprocs))
      96           6 :       ALLOCATE (rdispl(nprocs))
      97           6 :       ALLOCATE (buffer1(nbufmax))
      98           6 :       ALLOCATE (buffer2(nbufmax))
      99           6 :       ALLOCATE (buffer3(nbufmax))
     100           8 :       ALLOCATE (grid(Nloc, Nprocs))
     101           8 :       ALLOCATE (grid2(Nloc, Nprocs))
     102           8 :       ALLOCATE (grid3(Nloc, Nprocs))
     103           6 :       ALLOCATE (lgrid(Nloc))
     104           6 :       ALLOCATE (lgrid2(Nloc))
     105           6 :       ALLOCATE (lgrid3(Nloc))
     106           8 :       ALLOCATE (send_timings(0:nprocs - 1, 0:nprocs - 1))
     107           8 :       ALLOCATE (send_timings2(0:nprocs - 1, 0:nprocs - 1))
     108       20002 :       buffer1 = 0.0_dp
     109       20002 :       buffer2 = 0.0_dp
     110       20002 :       buffer3 = 0.0_dp
     111             :       ! timings
     112          14 :       send_timings = 0.0_dp
     113          14 :       send_timings2 = 0.0_dp
     114             :       ! -------------------------------------------------------------------------------------------
     115             :       ! ------------------------------ some in memory tests                   ---------------------
     116             :       ! -------------------------------------------------------------------------------------------
     117           2 :       CALL comm%sync()
     118           2 :       IF (ionode .AND. output_unit > 0) THEN
     119           1 :          WRITE (output_unit, *) "Testing in memory copies just 1 CPU "
     120           1 :          WRITE (output_unit, *) "  could tell something about the motherboard / cache / compiler "
     121             :       END IF
     122          10 :       DO i = 1, npow
     123           8 :          ncount = 10**i
     124           8 :          t2 = 0.0E0_dp
     125           8 :          IF (ncount .GT. nbufmax) CPABORT("")
     126          88 :          DO j = 1, 3**(npow - i)
     127          80 :             CALL comm%sync()
     128          80 :             t1 = MPI_WTIME()
     129       28420 :             buffer2(1:ncount) = buffer1(1:ncount)
     130          88 :             t2 = t2 + MPI_WTIME() - t1 + threshold
     131             :          END DO
     132           8 :          CALL comm%max(t2, 0)
     133          10 :          IF (ionode .AND. output_unit > 0) THEN
     134           4 :             WRITE (output_unit, '(I9,A,F12.4,A)') 8*ncount, " Bytes ", (3**(npow - i))*ncount*8.0E-6_dp/t2, " MB/s"
     135             :          END IF
     136             :       END DO
     137             :       ! -------------------------------------------------------------------------------------------
     138             :       ! ------------------------------ some in memory tests                   ---------------------
     139             :       ! -------------------------------------------------------------------------------------------
     140           2 :       CALL comm%sync()
     141           2 :       IF (ionode .AND. output_unit > 0) THEN
     142           1 :          WRITE (output_unit, *) "Testing in memory copies all cpus"
     143           1 :          WRITE (output_unit, *) "  is the memory bandwidth affected on an SMP machine ?"
     144             :       END IF
     145          10 :       DO i = 1, npow
     146           8 :          ncount = 10**i
     147           8 :          t2 = 0.0E0_dp
     148           8 :          IF (ncount .GT. nbufmax) CPABORT("")
     149          88 :          DO j = 1, 3**(npow - i)
     150          80 :             CALL comm%sync()
     151          80 :             t1 = MPI_WTIME()
     152       28420 :             buffer2(1:ncount) = buffer1(1:ncount)
     153          88 :             t2 = t2 + MPI_WTIME() - t1 + threshold
     154             :          END DO
     155           8 :          CALL comm%max(t2, 0)
     156          10 :          IF (ionode .AND. output_unit > 0) THEN
     157           4 :             WRITE (output_unit, '(I9,A,F12.4,A)') 8*ncount, " Bytes ", (3**(npow - i))*ncount*8.0E-6_dp/t2, " MB/s"
     158             :          END IF
     159             :       END DO
     160             :       ! -------------------------------------------------------------------------------------------
     161             :       ! ------------------------------ first test point to point communication ---------------------
     162             :       ! -------------------------------------------------------------------------------------------
     163           2 :       CALL comm%sync()
     164           2 :       IF (ionode .AND. output_unit > 0) THEN
     165           1 :          WRITE (output_unit, *) "Testing truly point to point communication (i with j only)"
     166           1 :          WRITE (output_unit, *) "  is there some different connection between i j (e.g. shared memory comm)"
     167             :       END IF
     168           2 :       ncount = 10**npow
     169           2 :       IF (ionode .AND. output_unit > 0) WRITE (output_unit, *) "For messages of ", ncount*8, " bytes"
     170             :       IF (ncount .GT. nbufmax) CPABORT("")
     171           6 :       DO itask = 0, nprocs - 1
     172           8 :          DO jtask = itask + 1, nprocs - 1
     173           2 :             CALL comm%sync()
     174           2 :             t1 = MPI_WTIME()
     175           2 :             IF (taskid .EQ. itask) THEN
     176           1 :                CALL comm%send(buffer1, jtask, itask*jtask)
     177             :             END IF
     178           2 :             IF (taskid .EQ. jtask) THEN
     179           1 :                source = itask
     180           1 :                tag = itask*jtask
     181           1 :                CALL comm%recv(buffer1, source, tag)
     182             :             END IF
     183           6 :             send_timings(itask, jtask) = MPI_WTIME() - t1 + threshold
     184             :          END DO
     185             :       END DO
     186           2 :       CALL comm%max(send_timings, 0)
     187           2 :       IF (ionode .AND. output_unit > 0) THEN
     188           3 :          DO itask = 0, nprocs - 1
     189           4 :             DO jtask = itask + 1, nprocs - 1
     190           3 :                WRITE (output_unit, '(I4,I4,F12.4,A)') itask, jtask, ncount*8.0E-6_dp/send_timings(itask, jtask), " MB/s"
     191             :             END DO
     192             :          END DO
     193             :       END IF
     194           2 :       CALL comm%sync()
     195             :       ! -------------------------------------------------------------------------------------------
     196             :       ! ------------------------------ second test point to point communication -------------------
     197             :       ! -------------------------------------------------------------------------------------------
     198           2 :       IF (ionode .AND. output_unit > 0) THEN
     199           1 :          WRITE (output_unit, *) "Testing all nearby point to point communication (0,1)(2,3)..."
     200           1 :          WRITE (output_unit, *) "    these could / should all be on the same shared memory node "
     201             :       END IF
     202          10 :       DO i = 1, npow
     203           8 :          ncount = 10**i
     204           8 :          t2 = 0.0E0_dp
     205           8 :          IF (ncount .GT. nbufmax) CPABORT("")
     206          88 :          DO j = 1, 3**(npow - i)
     207          80 :             CALL comm%sync()
     208          80 :             t1 = MPI_WTIME()
     209          80 :             IF (MODULO(taskid, 2) == 0) THEN
     210          40 :                CALL comm%send(buffer1, taskid + 1, 0)
     211             :             ELSE
     212          40 :                source = taskid - 1
     213          40 :                tag = 0
     214          40 :                CALL comm%recv(buffer1, source, tag)
     215             :             END IF
     216          88 :             t2 = t2 + MPI_WTIME() - t1 + threshold
     217             :          END DO
     218           8 :          CALL comm%max(t2, 0)
     219          10 :          IF (ionode .AND. output_unit > 0) THEN
     220           4 :             WRITE (output_unit, '(I9,A,F12.4,A)') 8*ncount, " Bytes ", (3**(npow - i))*ncount*8.0E-6_dp/t2, " MB/s"
     221             :          END IF
     222             :       END DO
     223           2 :       CALL comm%sync()
     224             :       ! -------------------------------------------------------------------------------------------
     225             :       ! ------------------------------ third test point to point communication -------------------
     226             :       ! -------------------------------------------------------------------------------------------
     227           2 :       IF (ionode .AND. output_unit > 0) THEN
     228           1 :          WRITE (output_unit, *) "Testing all far point to point communication (0,nprocs/2),(1,nprocs/2+1),.."
     229           1 :          WRITE (output_unit, *) "    these could all be going over the network, and stress it a lot"
     230             :       END IF
     231          10 :       DO i = 1, npow
     232           8 :          ncount = 10**i
     233           8 :          t2 = 0.0E0_dp
     234           8 :          IF (ncount .GT. nbufmax) CPABORT("")
     235          88 :          DO j = 1, 3**(npow - i)
     236          80 :             CALL comm%sync()
     237          80 :             t1 = MPI_WTIME()
     238             :             ! first half with partner
     239          80 :             IF (taskid .LT. nprocs/2) THEN
     240          40 :                CALL comm%send(buffer1, taskid + nprocs/2, 0)
     241             :             ELSE
     242          40 :                source = taskid - nprocs/2
     243          40 :                tag = 0
     244          40 :                CALL comm%recv(buffer1, source, tag)
     245             :             END IF
     246          88 :             t2 = t2 + MPI_WTIME() - t1 + threshold
     247             :          END DO
     248           8 :          CALL comm%max(t2, 0)
     249          10 :          IF (ionode .AND. output_unit > 0) THEN
     250           4 :             WRITE (output_unit, '(I9,A,F12.4,A)') 8*ncount, " Bytes ", (3**(npow - i))*ncount*8.0E-6_dp/t2, " MB/s"
     251             :          END IF
     252             :       END DO
     253             :       ! -------------------------------------------------------------------------------------------
     254             :       ! ------------------------------ test root to all broadcast               -------------------
     255             :       ! -------------------------------------------------------------------------------------------
     256           2 :       CALL comm%sync()
     257           2 :       IF (ionode .AND. output_unit > 0) THEN
     258           1 :          WRITE (output_unit, *) "Testing root to all broadcast "
     259           1 :          WRITE (output_unit, *) "    using trees at least ? "
     260             :       END IF
     261          10 :       DO i = 1, npow
     262           8 :          ncount = 10**i
     263           8 :          t2 = 0.0E0_dp
     264           8 :          IF (ncount .GT. nbufmax) CPABORT("")
     265          88 :          DO j = 1, 3**(npow - i)
     266          80 :             CALL comm%sync()
     267          80 :             t1 = MPI_WTIME()
     268          80 :             CALL comm%bcast(buffer1, 0)
     269          88 :             t2 = t2 + MPI_WTIME() - t1 + threshold
     270             :          END DO
     271           8 :          CALL comm%max(t2, 0)
     272          10 :          IF (ionode .AND. output_unit > 0) THEN
     273           4 :             WRITE (output_unit, '(I9,A,F12.4,A)') 8*ncount, " Bytes ", (3**(npow - i))*ncount*8.0E-6_dp/t2, " MB/s"
     274             :          END IF
     275             :       END DO
     276             :       ! -------------------------------------------------------------------------------------------
     277             :       ! ------------------------------ test parallel sum like behavior                -------------------
     278             :       ! -------------------------------------------------------------------------------------------
     279           2 :       CALL comm%sync()
     280           2 :       IF (ionode .AND. output_unit > 0) WRITE (output_unit, *) "Test global summation (mpi_allreduce) "
     281          10 :       DO i = 1, npow
     282           8 :          ncount = 10**i
     283           8 :          t2 = 0.0E0_dp
     284           8 :          IF (ncount .GT. nbufmax) CPABORT("")
     285          88 :          DO j = 1, 3**(npow - i)
     286      800080 :             buffer2(:) = buffer1
     287          80 :             CALL comm%sync()
     288          80 :             t1 = MPI_WTIME()
     289          80 :             CALL comm%sum(buffer2)
     290          88 :             t2 = t2 + MPI_WTIME() - t1 + threshold
     291             :          END DO
     292           8 :          CALL comm%max(t2, 0)
     293          10 :          IF (ionode .AND. output_unit > 0) THEN
     294           4 :             WRITE (output_unit, '(I9,A,F12.4,A)') 8*ncount, " Bytes ", (3**(npow - i))*ncount*8.0E-6_dp/t2, " MB/s"
     295             :          END IF
     296             :       END DO
     297             :       ! -------------------------------------------------------------------------------------------
     298             :       ! ------------------------------ test all to all communication            -------------------
     299             :       ! -------------------------------------------------------------------------------------------
     300           2 :       CALL comm%sync()
     301           2 :       IF (ionode .AND. output_unit > 0) THEN
     302           1 :          WRITE (output_unit, *) "Test all to all communication (mpi_alltoallv)"
     303           1 :          WRITE (output_unit, *) "    mpi/network getting confused ? "
     304             :       END IF
     305          10 :       DO i = 1, npow
     306           8 :          ncount = 10**i
     307           8 :          t2 = 0.0E0_dp
     308           8 :          IF (ncount .GT. nbufmax) CPABORT("")
     309          24 :          scount = ncount/nprocs
     310          24 :          rcount = ncount/nprocs
     311          24 :          DO j = 1, nprocs
     312          16 :             sdispl(j) = (j - 1)*(ncount/nprocs)
     313          24 :             rdispl(j) = (j - 1)*(ncount/nprocs)
     314             :          END DO
     315          88 :          DO j = 1, 3**(npow - i)
     316          80 :             CALL comm%sync()
     317          80 :             t1 = MPI_WTIME()
     318          80 :             CALL comm%alltoall(buffer1, scount, sdispl, buffer2, rcount, rdispl)
     319          88 :             t2 = t2 + MPI_WTIME() - t1 + threshold
     320             :          END DO
     321           8 :          CALL comm%max(t2, 0)
     322          10 :          IF (ionode .AND. output_unit > 0) THEN
     323           4 :             WRITE (output_unit, '(I9,A,F12.4,A)') 8*(ncount/nprocs)*nprocs, " Bytes ", &
     324           8 :                (3**(npow - i))*(ncount/nprocs)*nprocs*8.0E-6_dp/t2, " MB/s"
     325             :          END IF
     326             :       END DO
     327             : 
     328             :       ! -------------------------------------------------------------------------------------------
     329             :       ! ------------------------------ other stuff                            ---------------------
     330             :       ! -------------------------------------------------------------------------------------------
     331           2 :       IF (ionode .AND. output_unit > 0) THEN
     332           1 :          WRITE (output_unit, *) " Clean tests completed "
     333           1 :          WRITE (output_unit, *) " Testing MPI_REDUCE scatter"
     334             :       END IF
     335           6 :       rcount = Nloc
     336           8 :       DO itests = 1, 3
     337           6 :          IF (ionode .AND. output_unit > 0) &
     338           3 :             WRITE (output_unit, *) "------------------------------- test ", itests, " ------------------------"
     339             :          ! *** reference ***
     340          18 :          DO j = 1, Nprocs
     341       60018 :             DO i = 1, Nloc
     342       60012 :                grid(i, j) = MODULO(i*j*taskid, itests)
     343             :             END DO
     344             :          END DO
     345           6 :          t1 = MPI_WTIME()
     346           6 :          CALL comm%mp_sum_scatter_dv(grid, lgrid, rcount)
     347           6 :          t2 = MPI_WTIME() - t1 + threshold
     348           6 :          CALL comm%max(t2)
     349           6 :          IF (ionode .AND. output_unit > 0) WRITE (output_unit, *) "MPI_REDUCE_SCATTER    ", t2
     350             :          ! *** simple shift ***
     351          18 :          DO j = 1, Nprocs
     352       60018 :             DO i = 1, Nloc
     353       60012 :                grid2(i, j) = MODULO(i*j*taskid, itests)
     354             :             END DO
     355             :          END DO
     356           6 :          t3 = MPI_WTIME()
     357       30006 :          lgrid2 = 0.0E0_dp
     358          12 :          DO i = 1, Nprocs
     359       60012 :             lgrid2(:) = lgrid2 + grid(:, MODULO(taskid - i, Nprocs) + 1)
     360          12 :             IF (i .EQ. nprocs) EXIT
     361          12 :             CALL comm%shift(lgrid2, 1)
     362             :          END DO
     363           6 :          t4 = MPI_WTIME() - t3 + threshold
     364           6 :          CALL comm%max(t4)
     365       30012 :          maxdiff = MAXVAL(ABS(lgrid2 - lgrid))
     366           6 :          CALL comm%max(maxdiff)
     367           6 :          IF (ionode .AND. output_unit > 0) WRITE (output_unit, *) "MPI_SENDRECV_REPLACE  ", t4, maxdiff
     368             :          ! *** involved shift ****
     369             :          IF (MODULO(nprocs, 2) /= 0) CPABORT("")
     370          18 :          DO j = 1, Nprocs
     371       60018 :             DO i = 1, Nloc
     372       60012 :                grid3(i, j) = MODULO(i*j*taskid, itests)
     373             :             END DO
     374             :          END DO
     375           6 :          t3 = MPI_WTIME()
     376             :          ! first sum the grid in pairs (0,1),(2,3) should be within an LPAR and fast XXXXXXXXX
     377             :          ! 0 will only need parts 0,2,4,... correctly summed
     378             :          ! 1 will only need parts 1,3,5,... correctly summed
     379             :          ! *** could nicely be generalised ****
     380           6 :          IF (MODULO(taskid, 2) == 0) THEN
     381           3 :             partner = taskid + 1
     382           6 :             DO i = 1, Nprocs, 2 ! sum the full grid with the partner
     383           3 :                CALL comm%sendrecv(grid3(:, i + 1), partner, lgrid3, partner, 17)
     384       15006 :                grid3(:, i) = grid3(:, i) + lgrid3(:)
     385             :             END DO
     386             :          ELSE
     387           3 :             partner = taskid - 1
     388           6 :             DO i = 1, Nprocs, 2
     389           3 :                CALL comm%sendrecv(grid3(:, i), partner, lgrid3, partner, 17)
     390       15006 :                grid3(:, i + 1) = grid3(:, i + 1) + lgrid3(:)
     391             :             END DO
     392             :          END IF
     393           6 :          t4 = MPI_WTIME() - t3 + threshold
     394             :          ! now send a given buffer from 1 to 3 to 5 .. adding the right part of the data
     395             :          ! since we've summed an lgrid does only need to pass by even or odd tasks
     396           6 :          left = MODULO(taskid - 2, Nprocs)
     397           6 :          right = MODULO(taskid + 2, Nprocs)
     398           6 :          t3 = MPI_WTIME()
     399       30006 :          lgrid3 = 0.0E0_dp
     400           6 :          DO i = 1, Nprocs, 2
     401       30006 :             lgrid3(:) = lgrid3 + grid3(:, MODULO(taskid - i - 1, Nprocs) + 1)
     402           6 :             IF (i .EQ. nprocs - 1) EXIT
     403           6 :             CALL comm%shift(lgrid3, 2)
     404             :          END DO
     405           6 :          t5 = MPI_WTIME() - t3 + threshold
     406           6 :          CALL comm%max(t4)
     407           6 :          CALL comm%max(t5)
     408       30012 :          maxdiff = MAXVAL(ABS(lgrid3 - lgrid))
     409           6 :          CALL comm%max(maxdiff)
     410           8 :          IF (ionode .AND. output_unit > 0) WRITE (output_unit, *) "INVOLVED SHIFT        ", t4 + t5, "(", t4, ",", t5, ")", maxdiff
     411             :       END DO
     412           2 :       DEALLOCATE (rcount)
     413           2 :       DEALLOCATE (scount)
     414           2 :       DEALLOCATE (sdispl)
     415           2 :       DEALLOCATE (rdispl)
     416           2 :       DEALLOCATE (buffer1)
     417           2 :       DEALLOCATE (buffer2)
     418           2 :       DEALLOCATE (buffer3)
     419           2 :       DEALLOCATE (grid)
     420           2 :       DEALLOCATE (grid2)
     421           2 :       DEALLOCATE (grid3)
     422           2 :       DEALLOCATE (lgrid)
     423           2 :       DEALLOCATE (lgrid2)
     424           2 :       DEALLOCATE (lgrid3)
     425           2 :       DEALLOCATE (send_timings)
     426           2 :       DEALLOCATE (send_timings2)
     427             : #else
     428             :       MARK_USED(comm)
     429             :       MARK_USED(npow)
     430             :       IF (output_unit > 0) WRITE (output_unit, *) "No MPI tests for a serial program"
     431             : #endif
     432           2 :    END SUBROUTINE mpi_perf_test
     433             : 
     434             : END MODULE mp_perf_test

Generated by: LCOV version 1.15