LCOV - code coverage report
Current view: top level - src/common - timings_report.F (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 88.1 % 160 141
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 4 4

            Line data    Source code
       1              : !--------------------------------------------------------------------------------------------------!
       2              : !   CP2K: A general program to perform molecular dynamics simulations                              !
       3              : !   Copyright 2000-2025 CP2K developers group <https://cp2k.org>                                   !
       4              : !                                                                                                  !
       5              : !   SPDX-License-Identifier: GPL-2.0-or-later                                                      !
       6              : !--------------------------------------------------------------------------------------------------!
       7              : 
       8              : ! **************************************************************************************************
       9              : !> \brief Timing routines for accounting
      10              : !> \par History
      11              : !>      02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
      12              : !>      11.2004 storable timer_envs (for f77 interface) [fawzi]
      13              : !>      10.2005 binary search to speed up lookup in timeset [fawzi]
      14              : !>      12.2012 Complete rewrite based on dictionaries. [ole]
      15              : !>      01.2014 Collect statistics from all MPI ranks. [ole]
      16              : !> \author JGH
      17              : ! **************************************************************************************************
      18              : MODULE timings_report
      19              :    USE callgraph,                       ONLY: callgraph_item_type,&
      20              :                                               callgraph_items
      21              :    USE cp_files,                        ONLY: close_file,&
      22              :                                               open_file
      23              :    USE kinds,                           ONLY: default_string_length,&
      24              :                                               dp,&
      25              :                                               int_8
      26              :    USE list,                            ONLY: list_destroy,&
      27              :                                               list_get,&
      28              :                                               list_init,&
      29              :                                               list_isready,&
      30              :                                               list_pop,&
      31              :                                               list_push,&
      32              :                                               list_size
      33              :    USE list_routinereport,              ONLY: list_routinereport_type
      34              :    USE message_passing,                 ONLY: mp_para_env_type
      35              :    USE routine_map,                     ONLY: routine_map_get,&
      36              :                                               routine_map_haskey
      37              :    USE timings,                         ONLY: get_timer_env
      38              :    USE timings_base_type,               ONLY: call_stat_type,&
      39              :                                               routine_report_type,&
      40              :                                               routine_stat_type
      41              :    USE timings_types,                   ONLY: timer_env_type
      42              :    USE util,                            ONLY: sort
      43              : #include "../base/base_uses.f90"
      44              : 
      45              :    IMPLICIT NONE
      46              :    PRIVATE
      47              : 
      48              :    INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
      49              : 
      50              :    PUBLIC :: timings_report_print, timings_report_callgraph
      51              : 
      52              : CONTAINS
      53              : 
      54              : ! **************************************************************************************************
      55              : !> \brief Print accumulated information on timers
      56              : !> \param iw ...
      57              : !> \param r_timings ...
      58              : !> \param sort_by_self_time ...
      59              : !> \param cost_type ...
      60              : !> \param report_maxloc ...
      61              : !> \param para_env is needed to collect statistics from other nodes.
      62              : !> \par History
      63              : !>      none
      64              : !> \author JGH
      65              : ! **************************************************************************************************
      66         9881 :    SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
      67              :       INTEGER, INTENT(IN)                                :: iw
      68              :       REAL(KIND=dp), INTENT(IN)                          :: r_timings
      69              :       LOGICAL, INTENT(IN)                                :: sort_by_self_time
      70              :       INTEGER, INTENT(IN)                                :: cost_type
      71              :       LOGICAL, INTENT(IN)                                :: report_maxloc
      72              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
      73              : 
      74              :       TYPE(list_routinereport_type)                      :: reports
      75              :       TYPE(routine_report_type), POINTER                 :: r_report
      76              : 
      77         9881 :       CALL list_init(reports)
      78         9881 :       CALL collect_reports_from_ranks(reports, cost_type, para_env)
      79              : 
      80         9881 :       IF (list_size(reports) > 0 .AND. iw > 0) &
      81         5044 :          CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
      82              : 
      83              :       ! deallocate reports
      84      3968324 :       DO WHILE (list_size(reports) > 0)
      85      3958443 :          r_report => list_pop(reports)
      86      3958443 :          DEALLOCATE (r_report)
      87              :       END DO
      88         9881 :       CALL list_destroy(reports)
      89              : 
      90         9881 :    END SUBROUTINE timings_report_print
      91              : 
      92              : ! **************************************************************************************************
      93              : !> \brief Collects the timing or energy reports from all MPI ranks.
      94              : !> \param reports ...
      95              : !> \param cost_type ...
      96              : !> \param para_env ...
      97              : !> \author Ole Schuett
      98              : ! **************************************************************************************************
      99         9881 :    SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
     100              :       TYPE(list_routinereport_type), INTENT(INOUT)       :: reports
     101              :       INTEGER, INTENT(IN)                                :: cost_type
     102              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
     103              : 
     104              :       CHARACTER(LEN=default_string_length)               :: routineN
     105              :       INTEGER                                            :: local_routine_id, sending_rank
     106         9881 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: collected
     107              :       REAL(KIND=dp)                                      :: foobar
     108              :       REAL(KIND=dp), DIMENSION(2)                        :: dbuf
     109              :       TYPE(routine_report_type), POINTER                 :: r_report
     110              :       TYPE(routine_stat_type), POINTER                   :: r_stat
     111              :       TYPE(timer_env_type), POINTER                      :: timer_env
     112              : 
     113         9881 :       NULLIFY (r_stat, r_report, timer_env)
     114         9881 :       IF (.NOT. list_isready(reports)) &
     115            0 :          CPABORT("BUG")
     116              : 
     117         9881 :       timer_env => get_timer_env()
     118              : 
     119              :       ! make sure all functions have been called so that list_size(timer_env%routine_stats)
     120              :       ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
     121              :       ! this hack makes sure they are called before
     122         9881 :       routineN = ""
     123         9881 :       CALL para_env%bcast(routineN, 0)
     124         9881 :       sending_rank = 0
     125         9881 :       CALL para_env%max(sending_rank)
     126         9881 :       CALL para_env%sum(sending_rank)
     127         9881 :       foobar = 0.0_dp
     128         9881 :       CALL para_env%max(foobar)
     129         9881 :       dbuf = 0.0_dp
     130         9881 :       CALL para_env%maxloc(dbuf)
     131         9881 :       CALL para_env%sum(foobar)
     132              :       ! end hack
     133              : 
     134              :       ! Array collected is used as a bit field.
     135              :       ! It's of type integer in order to use the convenient MINLOC routine.
     136      3973292 :       ALLOCATE (collected(list_size(timer_env%routine_stats)), SOURCE=0)
     137              : 
     138      3958443 :       DO
     139              :          ! does any rank have uncollected stats?
     140      3968324 :          sending_rank = -1
     141    812465790 :          IF (.NOT. ALL(collected == 1)) sending_rank = para_env%mepos
     142      3968324 :          CALL para_env%max(sending_rank)
     143      3968324 :          IF (sending_rank < 0) EXIT ! every rank got all routines collected
     144      3958443 :          IF (sending_rank == para_env%mepos) THEN
     145    963399605 :             local_routine_id = MINLOC(collected, dim=1)
     146      2002232 :             r_stat => list_get(timer_env%routine_stats, local_routine_id)
     147      2002232 :             routineN = r_stat%routineN
     148              :          END IF
     149      3958443 :          CALL para_env%bcast(routineN, sending_rank)
     150              : 
     151              :          ! Create new report for routineN
     152      3958443 :          ALLOCATE (r_report)
     153      3958443 :          CALL list_push(reports, r_report)
     154      3958443 :          r_report%routineN = routineN
     155              : 
     156              :          ! If routineN was called on local node, add local stats
     157      3958443 :          IF (routine_map_haskey(timer_env%routine_names, routineN)) THEN
     158      3943649 :             local_routine_id = routine_map_get(timer_env%routine_names, routineN)
     159      3943649 :             collected(local_routine_id) = 1
     160      3943649 :             r_stat => list_get(timer_env%routine_stats, local_routine_id)
     161      3943649 :             r_report%max_total_calls = r_stat%total_calls
     162      3943649 :             r_report%sum_total_calls = r_stat%total_calls
     163      3943649 :             r_report%sum_stackdepth = r_stat%stackdepth_accu
     164      3943649 :             SELECT CASE (cost_type)
     165              :             CASE (cost_type_energy)
     166            0 :                r_report%max_icost = r_stat%incl_energy_accu
     167            0 :                r_report%sum_icost = r_stat%incl_energy_accu
     168            0 :                r_report%max_ecost = r_stat%excl_energy_accu
     169            0 :                r_report%sum_ecost = r_stat%excl_energy_accu
     170              :             CASE (cost_type_time)
     171      3943649 :                r_report%max_icost = r_stat%incl_walltime_accu
     172      3943649 :                r_report%sum_icost = r_stat%incl_walltime_accu
     173      3943649 :                r_report%max_ecost = r_stat%excl_walltime_accu
     174      3943649 :                r_report%sum_ecost = r_stat%excl_walltime_accu
     175              :             CASE DEFAULT
     176      3943649 :                CPABORT("BUG")
     177              :             END SELECT
     178              :          END IF
     179              : 
     180              :          ! collect stats of routineN via MPI
     181      3958443 :          CALL para_env%max(r_report%max_total_calls)
     182      3958443 :          CALL para_env%sum(r_report%sum_total_calls)
     183      3958443 :          CALL para_env%sum(r_report%sum_stackdepth)
     184              : 
     185              :          ! get value and rank of the maximum inclusive cost
     186     11875329 :          dbuf = [r_report%max_icost, REAL(para_env%mepos, KIND=dp)]
     187      3958443 :          CALL para_env%maxloc(dbuf)
     188      3958443 :          r_report%max_icost = dbuf(1)
     189      3958443 :          r_report%max_irank = INT(dbuf(2))
     190              : 
     191      3958443 :          CALL para_env%sum(r_report%sum_icost)
     192              : 
     193              :          ! get value and rank of the maximum exclusive cost
     194     11875329 :          dbuf = [r_report%max_ecost, REAL(para_env%mepos, KIND=dp)]
     195      3958443 :          CALL para_env%maxloc(dbuf)
     196      3958443 :          r_report%max_ecost = dbuf(1)
     197      3958443 :          r_report%max_erank = INT(dbuf(2))
     198              : 
     199      3968324 :          CALL para_env%sum(r_report%sum_ecost)
     200              :       END DO
     201              : 
     202         9881 :    END SUBROUTINE collect_reports_from_ranks
     203              : 
     204              : ! **************************************************************************************************
     205              : !> \brief Print the collected reports
     206              : !> \param reports ...
     207              : !> \param iw ...
     208              : !> \param threshold ...
     209              : !> \param sort_by_exclusiv_cost ...
     210              : !> \param cost_type ...
     211              : !> \param report_maxloc ...
     212              : !> \param para_env ...
     213              : !> \par History
     214              : !>      01.2014 Refactored (Ole Schuett)
     215              : !> \author JGH
     216              : ! **************************************************************************************************
     217         5044 :    SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
     218              :       TYPE(list_routinereport_type), INTENT(IN)          :: reports
     219              :       INTEGER, INTENT(IN)                                :: iw
     220              :       REAL(KIND=dp), INTENT(IN)                          :: threshold
     221              :       LOGICAL, INTENT(IN)                                :: sort_by_exclusiv_cost
     222              :       INTEGER, INTENT(IN)                                :: cost_type
     223              :       LOGICAL, INTENT(IN)                                :: report_maxloc
     224              :       TYPE(mp_para_env_type), INTENT(IN)                 :: para_env
     225              : 
     226              :       CHARACTER(LEN=4)                                   :: label
     227              :       CHARACTER(LEN=default_string_length)               :: fmt, title
     228              :       INTEGER                                            :: decimals, i, j, num_routines
     229         5044 :       INTEGER, ALLOCATABLE, DIMENSION(:)                 :: indices
     230              :       REAL(KIND=dp)                                      :: asd, maxcost, mincost
     231         5044 :       REAL(KIND=dp), ALLOCATABLE, DIMENSION(:)           :: max_costs
     232              :       TYPE(routine_report_type), POINTER                 :: r_report_i, r_report_j
     233              : 
     234         5044 :       NULLIFY (r_report_i, r_report_j)
     235         5044 :       IF (.NOT. list_isready(reports)) &
     236            0 :          CPABORT("BUG")
     237              : 
     238              :       ! are we printing timing or energy ?
     239         5044 :       SELECT CASE (cost_type)
     240              :       CASE (cost_type_energy)
     241            0 :          title = "E N E R G Y"
     242            0 :          label = "ENER"
     243              :       CASE (cost_type_time)
     244         5044 :          title = "T I M I N G"
     245         5044 :          label = "TIME"
     246              :       CASE DEFAULT
     247         5044 :          CPABORT("BUG")
     248              :       END SELECT
     249              : 
     250              :       ! write banner
     251         5044 :       WRITE (UNIT=iw, FMT="(/,T2,A)") REPEAT("-", 79)
     252         5044 :       WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
     253         5044 :       WRITE (UNIT=iw, FMT="(T2,A,T35,A,T80,A)") "-", TRIM(title), "-"
     254         5044 :       WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
     255         5044 :       WRITE (UNIT=iw, FMT="(T2,A)") REPEAT("-", 79)
     256         5044 :       IF (report_maxloc) THEN
     257              :          WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
     258            0 :             "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
     259              :       ELSE
     260              :          WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18)") &
     261         5044 :             "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
     262              :       END IF
     263              : 
     264              :       WRITE (UNIT=iw, FMT="(T33,A)") &
     265         5044 :          "MAXIMUM       AVERAGE  MAXIMUM  AVERAGE  MAXIMUM"
     266              : 
     267              :       ! sort statistics
     268         5044 :       num_routines = list_size(reports)
     269        15132 :       ALLOCATE (max_costs(num_routines))
     270      2007276 :       DO i = 1, num_routines
     271      2002232 :          r_report_i => list_get(reports, i)
     272      2007276 :          IF (sort_by_exclusiv_cost) THEN
     273         1436 :             max_costs(i) = r_report_i%max_ecost
     274              :          ELSE
     275      2000796 :             max_costs(i) = r_report_i%max_icost
     276              :          END IF
     277              :       END DO
     278        15132 :       ALLOCATE (indices(num_routines))
     279         5044 :       CALL sort(max_costs, num_routines, indices)
     280              : 
     281      2012320 :       maxcost = MAXVAL(max_costs)
     282         5044 :       mincost = maxcost*threshold
     283              : 
     284              :       ! adjust fmt dynamically based on the max walltime.
     285              :       ! few clocks have more than 3 digits resolution, so stop there
     286         5044 :       decimals = 3
     287         5044 :       IF (maxcost >= 10000) decimals = 2
     288            0 :       IF (maxcost >= 100000) decimals = 1
     289         5044 :       IF (maxcost >= 1000000) decimals = 0
     290         5044 :       IF (report_maxloc) THEN
     291              :          WRITE (UNIT=fmt, FMT="(A,I0,A)") &
     292            0 :             "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
     293              :       ELSE
     294              :          WRITE (UNIT=fmt, FMT="(A,I0,A)") &
     295         5044 :             "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
     296              :       END IF
     297              : 
     298              :       !write output
     299      2007276 :       DO i = num_routines, 1, -1
     300      2007276 :          IF (max_costs(i) >= mincost) THEN
     301       264448 :             j = indices(i)
     302       264448 :             r_report_j => list_get(reports, j)
     303              :             ! average stack depth
     304              :             asd = REAL(r_report_j%sum_stackdepth, KIND=dp)/ &
     305       264448 :                   REAL(MAX(1_int_8, r_report_j%sum_total_calls), KIND=dp)
     306       264448 :             IF (report_maxloc) THEN
     307              :                WRITE (UNIT=iw, FMT=fmt) &
     308            0 :                   ADJUSTL(r_report_j%routineN(1:31)), &
     309            0 :                   r_report_j%max_total_calls, &
     310            0 :                   asd, &
     311            0 :                   r_report_j%sum_ecost/para_env%num_pe, &
     312            0 :                   r_report_j%max_ecost, &
     313            0 :                   r_report_j%sum_icost/para_env%num_pe, &
     314            0 :                   r_report_j%max_icost, &
     315            0 :                   r_report_j%max_erank
     316              :             ELSE
     317              :                WRITE (UNIT=iw, FMT=fmt) &
     318       264448 :                   ADJUSTL(r_report_j%routineN(1:31)), &
     319       264448 :                   r_report_j%max_total_calls, &
     320       264448 :                   asd, &
     321       264448 :                   r_report_j%sum_ecost/para_env%num_pe, &
     322       264448 :                   r_report_j%max_ecost, &
     323       264448 :                   r_report_j%sum_icost/para_env%num_pe, &
     324       528896 :                   r_report_j%max_icost
     325              :             END IF
     326              :          END IF
     327              :       END DO
     328         5044 :       WRITE (UNIT=iw, FMT="(T2,A,/)") REPEAT("-", 79)
     329              : 
     330         5044 :    END SUBROUTINE print_reports
     331              : 
     332              : ! **************************************************************************************************
     333              : !> \brief Write accumulated callgraph information as cachegrind-file.
     334              : !> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
     335              : !> \param filename ...
     336              : !> \par History
     337              : !>     12.2012  initial version[ole]
     338              : !> \author Ole Schuett
     339              : ! **************************************************************************************************
     340            1 :    SUBROUTINE timings_report_callgraph(filename)
     341              :       CHARACTER(len=*), INTENT(in)                       :: filename
     342              : 
     343              :       INTEGER, PARAMETER                                 :: E = 1000, T = 100000
     344              : 
     345              :       INTEGER                                            :: i, unit
     346              :       TYPE(call_stat_type), POINTER                      :: c_stat
     347            1 :       TYPE(callgraph_item_type), DIMENSION(:), POINTER   :: ct_items
     348              :       TYPE(routine_stat_type), POINTER                   :: r_stat
     349              :       TYPE(timer_env_type), POINTER                      :: timer_env
     350              : 
     351              :       CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
     352            1 :                      file_form="FORMATTED", unit_number=unit)
     353            1 :       timer_env => get_timer_env()
     354              : 
     355              :       ! use outermost routine as total runtime
     356            1 :       r_stat => list_get(timer_env%routine_stats, 1)
     357            1 :       WRITE (UNIT=unit, FMT="(A)") "events: Walltime Energy"
     358            1 :       WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "summary: ", &
     359            1 :          INT(T*r_stat%incl_walltime_accu, KIND=int_8), &
     360            2 :          INT(E*r_stat%incl_energy_accu, KIND=int_8)
     361              : 
     362          452 :       DO i = 1, list_size(timer_env%routine_stats)
     363          451 :          r_stat => list_get(timer_env%routine_stats, i)
     364          451 :          WRITE (UNIT=unit, FMT="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
     365          451 :          WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
     366          451 :             INT(T*r_stat%excl_walltime_accu, KIND=int_8), &
     367          903 :             INT(E*r_stat%excl_energy_accu, KIND=int_8)
     368              :       END DO
     369              : 
     370            1 :       ct_items => callgraph_items(timer_env%callgraph)
     371          790 :       DO i = 1, SIZE(ct_items)
     372          789 :          c_stat => ct_items(i)%value
     373          789 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
     374          789 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
     375          789 :          WRITE (UNIT=unit, FMT="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
     376          789 :          WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
     377          789 :             INT(T*c_stat%incl_walltime_accu, KIND=int_8), &
     378         1579 :             INT(E*c_stat%incl_energy_accu, KIND=int_8)
     379              :       END DO
     380            1 :       DEALLOCATE (ct_items)
     381              : 
     382            1 :       CALL close_file(unit_number=unit, file_status="KEEP")
     383              : 
     384            1 :    END SUBROUTINE timings_report_callgraph
     385              : END MODULE timings_report
     386              : 
        

Generated by: LCOV version 2.0-1