LCOV - code coverage report
Current view: top level - src/common - timings_report.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 88.2 % 161 142
Test Date: 2025-07-25 12:55:17 Functions: 100.0 % 4 4

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Timing routines for accounting
      10              : !> \par History
      11              : !>      02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
      12              : !>      11.2004 storable timer_envs (for f77 interface) [fawzi]
      13              : !>      10.2005 binary search to speed up lookup in timeset [fawzi]
      14              : !>      12.2012 Complete rewrite based on dictionaries. [ole]
      15              : !>      01.2014 Collect statistics from all MPI ranks. [ole]
      16              : !> \author JGH
      17              : ! **************************************************************************************************
      18              : MODULE timings_report
      19              :    USE callgraph,                       ONLY: callgraph_item_type,&
      20              :                                               callgraph_items
      21              :    USE cp_files,                        ONLY: close_file,&
      22              :                                               open_file
      23              :    USE kinds,                           ONLY: default_string_length,&
      24              :                                               dp,&
      25              :                                               int_8
      26              :    USE list,                            ONLY: list_destroy,&
      27              :                                               list_get,&
      28              :                                               list_init,&
      29              :                                               list_isready,&
      30              :                                               list_pop,&
      31              :                                               list_push,&
      32              :                                               list_size
      33              :    USE list_routinereport,              ONLY: list_routinereport_type
      34              :    USE message_passing,                 ONLY: mp_para_env_type
      35              :    USE routine_map,                     ONLY: routine_map_get,&
      36              :                                               routine_map_haskey
      37              :    USE timings,                         ONLY: get_timer_env
      38              :    USE timings_base_type,               ONLY: call_stat_type,&
      39              :                                               routine_report_type,&
      40              :                                               routine_stat_type
      41              :    USE timings_types,                   ONLY: timer_env_type
      42              :    USE util,                            ONLY: sort
      43              : #include "../base/base_uses.f90"
      44              : 
      45              :    IMPLICIT NONE
      46              :    PRIVATE
      47              : 
      48              :    INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
      49              : 
      50              :    PUBLIC :: timings_report_print, timings_report_callgraph
      51              : 
      52              : CONTAINS
      53              : 
      54              : ! **************************************************************************************************
      55              : !> \brief Print accumulated information on timers
      56              : !> \param iw ...
      57              : !> \param r_timings ...
      58              : !> \param sort_by_self_time ...
      59              : !> \param cost_type ...
      60              : !> \param report_maxloc ...
      61              : !> \param para_env is needed to collect statistics from other nodes.
      62              : !> \par History
      63              : !>      none
      64              : !> \author JGH
      65              : ! **************************************************************************************************
      66         9835 :    SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
      67              :       INTEGER, INTENT(IN)                                :: iw
      68              :       REAL(KIND=dp), INTENT(IN)                          :: r_timings
      69              :       LOGICAL, INTENT(IN)                                :: sort_by_self_time
      70              :       INTEGER, INTENT(IN)                                :: cost_type
      71              :       LOGICAL, INTENT(IN)                                :: report_maxloc
      72              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
      73              : 
      74              :       TYPE(list_routinereport_type)                      :: reports
      75              :       TYPE(routine_report_type), POINTER                 :: r_report
      76              : 
      77         9835 :       CALL list_init(reports)
      78         9835 :       CALL collect_reports_from_ranks(reports, cost_type, para_env)
      79              : 
      80         9835 :       IF (list_size(reports) > 0 .AND. iw > 0) &
      81         5021 :          CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
      82              : 
      83              :       ! deallocate reports
      84      3938912 :       DO WHILE (list_size(reports) > 0)
      85      3929077 :          r_report => list_pop(reports)
      86      3929077 :          DEALLOCATE (r_report)
      87              :       END DO
      88         9835 :       CALL list_destroy(reports)
      89              : 
      90         9835 :    END SUBROUTINE timings_report_print
      91              : 
      92              : ! **************************************************************************************************
      93              : !> \brief Collects the timing or energy reports from all MPI ranks.
      94              : !> \param reports ...
      95              : !> \param cost_type ...
      96              : !> \param para_env ...
      97              : !> \author Ole Schuett
      98              : ! **************************************************************************************************
      99         9835 :    SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
     100              :       TYPE(list_routinereport_type), INTENT(INOUT)       :: reports
     101              :       INTEGER, INTENT(IN)                                :: cost_type
     102              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
     103              : 
     104              :       CHARACTER(LEN=default_string_length)               :: routineN
     105              :       INTEGER                                            :: local_routine_id, sending_rank
     106         9835 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: collected
     107              :       REAL(KIND=dp)                                      :: foobar
     108              :       REAL(KIND=dp), DIMENSION(2)                        :: dbuf
     109              :       TYPE(routine_report_type), POINTER                 :: r_report
     110              :       TYPE(routine_stat_type), POINTER                   :: r_stat
     111              :       TYPE(timer_env_type), POINTER                      :: timer_env
     112              : 
     113         9835 :       NULLIFY (r_stat, r_report, timer_env)
     114         9835 :       IF (.NOT. list_isready(reports)) &
     115            0 :          CPABORT("BUG")
     116              : 
     117         9835 :       timer_env => get_timer_env()
     118              : 
     119              :       ! make sure all functions have been called so that list_size(timer_env%routine_stats)
     120              :       ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
     121              :       ! this hack makes sure they are called before
     122         9835 :       routineN = ""
     123         9835 :       CALL para_env%bcast(routineN, 0)
     124         9835 :       sending_rank = 0
     125         9835 :       CALL para_env%max(sending_rank)
     126         9835 :       CALL para_env%sum(sending_rank)
     127         9835 :       foobar = 0.0_dp
     128         9835 :       CALL para_env%max(foobar)
     129         9835 :       dbuf = 0.0_dp
     130         9835 :       CALL para_env%maxloc(dbuf)
     131         9835 :       CALL para_env%sum(foobar)
     132              :       ! end hack
     133              : 
     134              :       ! Array collected is used as a bit field.
     135              :       ! It's of type integer in order to use the convenient MINLOC routine.
     136        29505 :       ALLOCATE (collected(list_size(timer_env%routine_stats)))
     137      3924318 :       collected(:) = 0
     138              : 
     139      3929077 :       DO
     140              :          ! does any rank have uncollected stats?
     141      3938912 :          sending_rank = -1
     142    803347847 :          IF (.NOT. ALL(collected == 1)) sending_rank = para_env%mepos
     143      3938912 :          CALL para_env%max(sending_rank)
     144      3938912 :          IF (sending_rank < 0) EXIT ! every rank got all routines collected
     145      3929077 :          IF (sending_rank == para_env%mepos) THEN
     146    952783500 :             local_routine_id = MINLOC(collected, dim=1)
     147      1987513 :             r_stat => list_get(timer_env%routine_stats, local_routine_id)
     148      1987513 :             routineN = r_stat%routineN
     149              :          END IF
     150      3929077 :          CALL para_env%bcast(routineN, sending_rank)
     151              : 
     152              :          ! Create new report for routineN
     153      3929077 :          ALLOCATE (r_report)
     154      3929077 :          CALL list_push(reports, r_report)
     155      3929077 :          r_report%routineN = routineN
     156              : 
     157              :          ! If routineN was called on local node, add local stats
     158      3929077 :          IF (routine_map_haskey(timer_env%routine_names, routineN)) THEN
     159      3914483 :             local_routine_id = routine_map_get(timer_env%routine_names, routineN)
     160      3914483 :             collected(local_routine_id) = 1
     161      3914483 :             r_stat => list_get(timer_env%routine_stats, local_routine_id)
     162      3914483 :             r_report%max_total_calls = r_stat%total_calls
     163      3914483 :             r_report%sum_total_calls = r_stat%total_calls
     164      3914483 :             r_report%sum_stackdepth = r_stat%stackdepth_accu
     165      3914483 :             SELECT CASE (cost_type)
     166              :             CASE (cost_type_energy)
     167            0 :                r_report%max_icost = r_stat%incl_energy_accu
     168            0 :                r_report%sum_icost = r_stat%incl_energy_accu
     169            0 :                r_report%max_ecost = r_stat%excl_energy_accu
     170            0 :                r_report%sum_ecost = r_stat%excl_energy_accu
     171              :             CASE (cost_type_time)
     172      3914483 :                r_report%max_icost = r_stat%incl_walltime_accu
     173      3914483 :                r_report%sum_icost = r_stat%incl_walltime_accu
     174      3914483 :                r_report%max_ecost = r_stat%excl_walltime_accu
     175      3914483 :                r_report%sum_ecost = r_stat%excl_walltime_accu
     176              :             CASE DEFAULT
     177      3914483 :                CPABORT("BUG")
     178              :             END SELECT
     179              :          END IF
     180              : 
     181              :          ! collect stats of routineN via MPI
     182      3929077 :          CALL para_env%max(r_report%max_total_calls)
     183      3929077 :          CALL para_env%sum(r_report%sum_total_calls)
     184      3929077 :          CALL para_env%sum(r_report%sum_stackdepth)
     185              : 
     186              :          ! get value and rank of the maximum inclusive cost
     187     11787231 :          dbuf = (/r_report%max_icost, REAL(para_env%mepos, KIND=dp)/)
     188      3929077 :          CALL para_env%maxloc(dbuf)
     189      3929077 :          r_report%max_icost = dbuf(1)
     190      3929077 :          r_report%max_irank = INT(dbuf(2))
     191              : 
     192      3929077 :          CALL para_env%sum(r_report%sum_icost)
     193              : 
     194              :          ! get value and rank of the maximum exclusive cost
     195     11787231 :          dbuf = (/r_report%max_ecost, REAL(para_env%mepos, KIND=dp)/)
     196      3929077 :          CALL para_env%maxloc(dbuf)
     197      3929077 :          r_report%max_ecost = dbuf(1)
     198      3929077 :          r_report%max_erank = INT(dbuf(2))
     199              : 
     200      3938912 :          CALL para_env%sum(r_report%sum_ecost)
     201              :       END DO
     202              : 
     203         9835 :    END SUBROUTINE collect_reports_from_ranks
     204              : 
     205              : ! **************************************************************************************************
     206              : !> \brief Print the collected reports
     207              : !> \param reports ...
     208              : !> \param iw ...
     209              : !> \param threshold ...
     210              : !> \param sort_by_exclusiv_cost ...
     211              : !> \param cost_type ...
     212              : !> \param report_maxloc ...
     213              : !> \param para_env ...
     214              : !> \par History
     215              : !>      01.2014 Refactored (Ole Schuett)
     216              : !> \author JGH
     217              : ! **************************************************************************************************
     218         5021 :    SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
     219              :       TYPE(list_routinereport_type), INTENT(IN)          :: reports
     220              :       INTEGER, INTENT(IN)                                :: iw
     221              :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     222              :       LOGICAL, INTENT(IN)                                :: sort_by_exclusiv_cost
     223              :       INTEGER, INTENT(IN)                                :: cost_type
     224              :       LOGICAL, INTENT(IN)                                :: report_maxloc
     225              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
     226              : 
     227              :       CHARACTER(LEN=4)                                   :: label
     228              :       CHARACTER(LEN=default_string_length)               :: fmt, title
     229              :       INTEGER                                            :: decimals, i, j, num_routines
     230         5021 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indices
     231              :       REAL(KIND=dp)                                      :: asd, maxcost, mincost
     232         5021 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: max_costs
     233              :       TYPE(routine_report_type), POINTER                 :: r_report_i, r_report_j
     234              : 
     235         5021 :       NULLIFY (r_report_i, r_report_j)
     236         5021 :       IF (.NOT. list_isready(reports)) &
     237            0 :          CPABORT("BUG")
     238              : 
     239              :       ! are we printing timing or energy ?
     240         5021 :       SELECT CASE (cost_type)
     241              :       CASE (cost_type_energy)
     242            0 :          title = "E N E R G Y"
     243            0 :          label = "ENER"
     244              :       CASE (cost_type_time)
     245         5021 :          title = "T I M I N G"
     246         5021 :          label = "TIME"
     247              :       CASE DEFAULT
     248         5021 :          CPABORT("BUG")
     249              :       END SELECT
     250              : 
     251              :       ! write banner
     252         5021 :       WRITE (UNIT=iw, FMT="(/,T2,A)") REPEAT("-", 79)
     253         5021 :       WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
     254         5021 :       WRITE (UNIT=iw, FMT="(T2,A,T35,A,T80,A)") "-", TRIM(title), "-"
     255         5021 :       WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
     256         5021 :       WRITE (UNIT=iw, FMT="(T2,A)") REPEAT("-", 79)
     257         5021 :       IF (report_maxloc) THEN
     258              :          WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
     259            0 :             "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
     260              :       ELSE
     261              :          WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18)") &
     262         5021 :             "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
     263              :       END IF
     264              : 
     265              :       WRITE (UNIT=iw, FMT="(T33,A)") &
     266         5021 :          "MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM"
     267              : 
     268              :       ! sort statistics
     269         5021 :       num_routines = list_size(reports)
     270        15063 :       ALLOCATE (max_costs(num_routines))
     271      1992534 :       DO i = 1, num_routines
     272      1987513 :          r_report_i => list_get(reports, i)
     273      1992534 :          IF (sort_by_exclusiv_cost) THEN
     274         1442 :             max_costs(i) = r_report_i%max_ecost
     275              :          ELSE
     276      1986071 :             max_costs(i) = r_report_i%max_icost
     277              :          END IF
     278              :       END DO
     279        15063 :       ALLOCATE (indices(num_routines))
     280         5021 :       CALL sort(max_costs, num_routines, indices)
     281              : 
     282      1997555 :       maxcost = MAXVAL(max_costs)
     283         5021 :       mincost = maxcost*threshold
     284              : 
     285              :       ! adjust fmt dynamically based on the max walltime.
     286              :       ! few clocks have more than 3 digits resolution, so stop there
     287         5021 :       decimals = 3
     288         5021 :       IF (maxcost >= 10000) decimals = 2
     289            0 :       IF (maxcost >= 100000) decimals = 1
     290         5021 :       IF (maxcost >= 1000000) decimals = 0
     291         5021 :       IF (report_maxloc) THEN
     292              :          WRITE (UNIT=fmt, FMT="(A,I0,A)") &
     293            0 :             "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
     294              :       ELSE
     295              :          WRITE (UNIT=fmt, FMT="(A,I0,A)") &
     296         5021 :             "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
     297              :       END IF
     298              : 
     299              :       !write output
     300      1992534 :       DO i = num_routines, 1, -1
     301      1992534 :          IF (max_costs(i) >= mincost) THEN
     302       263480 :             j = indices(i)
     303       263480 :             r_report_j => list_get(reports, j)
     304              :             ! average stack depth
     305              :             asd = REAL(r_report_j%sum_stackdepth, KIND=dp)/ &
     306       263480 :                   REAL(MAX(1_int_8, r_report_j%sum_total_calls), KIND=dp)
     307       263480 :             IF (report_maxloc) THEN
     308              :                WRITE (UNIT=iw, FMT=fmt) &
     309            0 :                   ADJUSTL(r_report_j%routineN(1:31)), &
     310            0 :                   r_report_j%max_total_calls, &
     311            0 :                   asd, &
     312            0 :                   r_report_j%sum_ecost/para_env%num_pe, &
     313            0 :                   r_report_j%max_ecost, &
     314            0 :                   r_report_j%sum_icost/para_env%num_pe, &
     315            0 :                   r_report_j%max_icost, &
     316            0 :                   r_report_j%max_erank
     317              :             ELSE
     318              :                WRITE (UNIT=iw, FMT=fmt) &
     319       263480 :                   ADJUSTL(r_report_j%routineN(1:31)), &
     320       263480 :                   r_report_j%max_total_calls, &
     321       263480 :                   asd, &
     322       263480 :                   r_report_j%sum_ecost/para_env%num_pe, &
     323       263480 :                   r_report_j%max_ecost, &
     324       263480 :                   r_report_j%sum_icost/para_env%num_pe, &
     325       526960 :                   r_report_j%max_icost
     326              :             END IF
     327              :          END IF
     328              :       END DO
     329         5021 :       WRITE (UNIT=iw, FMT="(T2,A,/)") REPEAT("-", 79)
     330              : 
     331         5021 :    END SUBROUTINE print_reports
     332              : 
     333              : ! **************************************************************************************************
     334              : !> \brief Write accumulated callgraph information as cachegrind-file.
     335              : !> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
     336              : !> \param filename ...
     337              : !> \par History
     338              : !>     12.2012  initial version[ole]
     339              : !> \author Ole Schuett
     340              : ! **************************************************************************************************
     341            1 :    SUBROUTINE timings_report_callgraph(filename)
     342              :       CHARACTER(len=*), INTENT(in)                       :: filename
     343              : 
     344              :       INTEGER, PARAMETER                                 :: E = 1000, T = 100000
     345              : 
     346              :       INTEGER                                            :: i, unit
     347              :       TYPE(call_stat_type), POINTER                      :: c_stat
     348            1 :       TYPE(callgraph_item_type), DIMENSION(:), POINTER   :: ct_items
     349              :       TYPE(routine_stat_type), POINTER                   :: r_stat
     350              :       TYPE(timer_env_type), POINTER                      :: timer_env
     351              : 
     352              :       CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
     353            1 :                      file_form="FORMATTED", unit_number=unit)
     354            1 :       timer_env => get_timer_env()
     355              : 
     356              :       ! use outermost routine as total runtime
     357            1 :       r_stat => list_get(timer_env%routine_stats, 1)
     358            1 :       WRITE (UNIT=unit, FMT="(A)") "events: Walltime Energy"
     359            1 :       WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "summary: ", &
     360            1 :          INT(T*r_stat%incl_walltime_accu, KIND=int_8), &
     361            2 :          INT(E*r_stat%incl_energy_accu, KIND=int_8)
     362              : 
     363          453 :       DO i = 1, list_size(timer_env%routine_stats)
     364          452 :          r_stat => list_get(timer_env%routine_stats, i)
     365          452 :          WRITE (UNIT=unit, FMT="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
     366          452 :          WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
     367          452 :             INT(T*r_stat%excl_walltime_accu, KIND=int_8), &
     368          905 :             INT(E*r_stat%excl_energy_accu, KIND=int_8)
     369              :       END DO
     370              : 
     371            1 :       ct_items => callgraph_items(timer_env%callgraph)
     372          792 :       DO i = 1, SIZE(ct_items)
     373          791 :          c_stat => ct_items(i)%value
     374          791 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
     375          791 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
     376          791 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
     377          791 :          WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
     378          791 :             INT(T*c_stat%incl_walltime_accu, KIND=int_8), &
     379         1583 :             INT(E*c_stat%incl_energy_accu, KIND=int_8)
     380              :       END DO
     381            1 :       DEALLOCATE (ct_items)
     382              : 
     383            1 :       CALL close_file(unit_number=unit, file_status="KEEP")
     384              : 
     385            1 :    END SUBROUTINE timings_report_callgraph
     386              : END MODULE timings_report
     387              : 
        

Generated by: LCOV version 2.0-1