Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Timing routines for accounting
10 : !> \par History
11 : !> 02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
12 : !> 11.2004 storable timer_envs (for f77 interface) [fawzi]
13 : !> 10.2005 binary search to speed up lookup in timeset [fawzi]
14 : !> 12.2012 Complete rewrite based on dictionaries. [ole]
15 : !> 01.2014 Collect statistics from all MPI ranks. [ole]
16 : !> \author JGH
17 : ! **************************************************************************************************
18 : MODULE timings_report
19 : USE callgraph, ONLY: callgraph_item_type,&
20 : callgraph_items
21 : USE cp_files, ONLY: close_file,&
22 : open_file
23 : USE kinds, ONLY: default_string_length,&
24 : dp,&
25 : int_8
26 : USE list, ONLY: list_destroy,&
27 : list_get,&
28 : list_init,&
29 : list_isready,&
30 : list_pop,&
31 : list_push,&
32 : list_size
33 : USE list_routinereport, ONLY: list_routinereport_type
34 : USE message_passing, ONLY: mp_para_env_type
35 : USE routine_map, ONLY: routine_map_get,&
36 : routine_map_haskey
37 : USE timings, ONLY: get_timer_env
38 : USE timings_base_type, ONLY: call_stat_type,&
39 : routine_report_type,&
40 : routine_stat_type
41 : USE timings_types, ONLY: timer_env_type
42 : USE util, ONLY: sort
43 : #include "../base/base_uses.f90"
44 :
45 : IMPLICIT NONE
46 : PRIVATE
47 :
48 : INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
49 :
50 : PUBLIC :: timings_report_print, timings_report_callgraph
51 :
52 : CONTAINS
53 :
54 : ! **************************************************************************************************
55 : !> \brief Print accumulated information on timers
56 : !> \param iw ...
57 : !> \param r_timings ...
58 : !> \param sort_by_self_time ...
59 : !> \param cost_type ...
60 : !> \param report_maxloc ...
61 : !> \param para_env is needed to collect statistics from other nodes.
62 : !> \par History
63 : !> none
64 : !> \author JGH
65 : ! **************************************************************************************************
66 9755 : SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
67 : INTEGER, INTENT(IN) :: iw
68 : REAL(KIND=dp), INTENT(IN) :: r_timings
69 : LOGICAL, INTENT(IN) :: sort_by_self_time
70 : INTEGER, INTENT(IN) :: cost_type
71 : LOGICAL, INTENT(IN) :: report_maxloc
72 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
73 :
74 : TYPE(list_routinereport_type) :: reports
75 : TYPE(routine_report_type), POINTER :: r_report
76 :
77 9755 : CALL list_init(reports)
78 9755 : CALL collect_reports_from_ranks(reports, cost_type, para_env)
79 :
80 9755 : IF (list_size(reports) > 0 .AND. iw > 0) &
81 4981 : CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
82 :
83 : ! deallocate reports
84 3898670 : DO WHILE (list_size(reports) > 0)
85 3888915 : r_report => list_pop(reports)
86 3888915 : DEALLOCATE (r_report)
87 : END DO
88 9755 : CALL list_destroy(reports)
89 :
90 9755 : END SUBROUTINE timings_report_print
91 :
92 : ! **************************************************************************************************
93 : !> \brief Collects the timing or energy reports from all MPI ranks.
94 : !> \param reports ...
95 : !> \param cost_type ...
96 : !> \param para_env ...
97 : !> \author Ole Schuett
98 : ! **************************************************************************************************
99 9755 : SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
100 : TYPE(list_routinereport_type), INTENT(INOUT) :: reports
101 : INTEGER, INTENT(IN) :: cost_type
102 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
103 :
104 : CHARACTER(LEN=default_string_length) :: routineN
105 : INTEGER :: local_routine_id, sending_rank
106 9755 : INTEGER, ALLOCATABLE, DIMENSION(:) :: collected
107 : REAL(KIND=dp) :: foobar
108 : REAL(KIND=dp), DIMENSION(2) :: dbuf
109 : TYPE(routine_report_type), POINTER :: r_report
110 : TYPE(routine_stat_type), POINTER :: r_stat
111 : TYPE(timer_env_type), POINTER :: timer_env
112 :
113 9755 : NULLIFY (r_stat, r_report, timer_env)
114 9755 : IF (.NOT. list_isready(reports)) &
115 0 : CPABORT("BUG")
116 :
117 9755 : timer_env => get_timer_env()
118 :
119 : ! make sure all functions have been called so that list_size(timer_env%routine_stats)
120 : ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
121 : ! this hack makes sure they are called before
122 9755 : routineN = ""
123 9755 : CALL para_env%bcast(routineN, 0)
124 9755 : sending_rank = 0
125 9755 : CALL para_env%max(sending_rank)
126 9755 : CALL para_env%sum(sending_rank)
127 9755 : foobar = 0.0_dp
128 9755 : CALL para_env%max(foobar)
129 9755 : dbuf = 0.0_dp
130 9755 : CALL para_env%maxloc(dbuf)
131 9755 : CALL para_env%sum(foobar)
132 : ! end hack
133 :
134 : ! Array collected is used as a bit field.
135 : ! It's of type integer in order to use the convenient MINLOC routine.
136 29265 : ALLOCATE (collected(list_size(timer_env%routine_stats)))
137 3884265 : collected(:) = 0
138 :
139 3888915 : DO
140 : ! does any rank have uncollected stats?
141 3898670 : sending_rank = -1
142 794023110 : IF (.NOT. ALL(collected == 1)) sending_rank = para_env%mepos
143 3898670 : CALL para_env%max(sending_rank)
144 3898670 : IF (sending_rank < 0) EXIT ! every rank got all routines collected
145 3888915 : IF (sending_rank == para_env%mepos) THEN
146 942237602 : local_routine_id = MINLOC(collected, dim=1)
147 1967432 : r_stat => list_get(timer_env%routine_stats, local_routine_id)
148 1967432 : routineN = r_stat%routineN
149 : END IF
150 3888915 : CALL para_env%bcast(routineN, sending_rank)
151 :
152 : ! Create new report for routineN
153 3888915 : ALLOCATE (r_report)
154 3888915 : CALL list_push(reports, r_report)
155 3888915 : r_report%routineN = routineN
156 :
157 : ! If routineN was called on local node, add local stats
158 3888915 : IF (routine_map_haskey(timer_env%routine_names, routineN)) THEN
159 3874510 : local_routine_id = routine_map_get(timer_env%routine_names, routineN)
160 3874510 : collected(local_routine_id) = 1
161 3874510 : r_stat => list_get(timer_env%routine_stats, local_routine_id)
162 3874510 : r_report%max_total_calls = r_stat%total_calls
163 3874510 : r_report%sum_total_calls = r_stat%total_calls
164 3874510 : r_report%sum_stackdepth = r_stat%stackdepth_accu
165 3874510 : SELECT CASE (cost_type)
166 : CASE (cost_type_energy)
167 0 : r_report%max_icost = r_stat%incl_energy_accu
168 0 : r_report%sum_icost = r_stat%incl_energy_accu
169 0 : r_report%max_ecost = r_stat%excl_energy_accu
170 0 : r_report%sum_ecost = r_stat%excl_energy_accu
171 : CASE (cost_type_time)
172 3874510 : r_report%max_icost = r_stat%incl_walltime_accu
173 3874510 : r_report%sum_icost = r_stat%incl_walltime_accu
174 3874510 : r_report%max_ecost = r_stat%excl_walltime_accu
175 3874510 : r_report%sum_ecost = r_stat%excl_walltime_accu
176 : CASE DEFAULT
177 3874510 : CPABORT("BUG")
178 : END SELECT
179 : END IF
180 :
181 : ! collect stats of routineN via MPI
182 3888915 : CALL para_env%max(r_report%max_total_calls)
183 3888915 : CALL para_env%sum(r_report%sum_total_calls)
184 3888915 : CALL para_env%sum(r_report%sum_stackdepth)
185 :
186 : ! get value and rank of the maximum inclusive cost
187 11666745 : dbuf = (/r_report%max_icost, REAL(para_env%mepos, KIND=dp)/)
188 3888915 : CALL para_env%maxloc(dbuf)
189 3888915 : r_report%max_icost = dbuf(1)
190 3888915 : r_report%max_irank = INT(dbuf(2))
191 :
192 3888915 : CALL para_env%sum(r_report%sum_icost)
193 :
194 : ! get value and rank of the maximum exclusive cost
195 11666745 : dbuf = (/r_report%max_ecost, REAL(para_env%mepos, KIND=dp)/)
196 3888915 : CALL para_env%maxloc(dbuf)
197 3888915 : r_report%max_ecost = dbuf(1)
198 3888915 : r_report%max_erank = INT(dbuf(2))
199 :
200 3898670 : CALL para_env%sum(r_report%sum_ecost)
201 : END DO
202 :
203 9755 : END SUBROUTINE collect_reports_from_ranks
204 :
205 : ! **************************************************************************************************
206 : !> \brief Print the collected reports
207 : !> \param reports ...
208 : !> \param iw ...
209 : !> \param threshold ...
210 : !> \param sort_by_exclusiv_cost ...
211 : !> \param cost_type ...
212 : !> \param report_maxloc ...
213 : !> \param para_env ...
214 : !> \par History
215 : !> 01.2014 Refactored (Ole Schuett)
216 : !> \author JGH
217 : ! **************************************************************************************************
218 4981 : SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
219 : TYPE(list_routinereport_type), INTENT(IN) :: reports
220 : INTEGER, INTENT(IN) :: iw
221 : REAL(KIND=dp), INTENT(IN) :: threshold
222 : LOGICAL, INTENT(IN) :: sort_by_exclusiv_cost
223 : INTEGER, INTENT(IN) :: cost_type
224 : LOGICAL, INTENT(IN) :: report_maxloc
225 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
226 :
227 : CHARACTER(LEN=4) :: label
228 : CHARACTER(LEN=default_string_length) :: fmt, title
229 : INTEGER :: decimals, i, j, num_routines
230 4981 : INTEGER, ALLOCATABLE, DIMENSION(:) :: indices
231 : REAL(KIND=dp) :: asd, maxcost, mincost
232 4981 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: max_costs
233 : TYPE(routine_report_type), POINTER :: r_report_i, r_report_j
234 :
235 4981 : NULLIFY (r_report_i, r_report_j)
236 4981 : IF (.NOT. list_isready(reports)) &
237 0 : CPABORT("BUG")
238 :
239 : ! are we printing timing or energy ?
240 4981 : SELECT CASE (cost_type)
241 : CASE (cost_type_energy)
242 0 : title = "E N E R G Y"
243 0 : label = "ENER"
244 : CASE (cost_type_time)
245 4981 : title = "T I M I N G"
246 4981 : label = "TIME"
247 : CASE DEFAULT
248 4981 : CPABORT("BUG")
249 : END SELECT
250 :
251 : ! write banner
252 4981 : WRITE (UNIT=iw, FMT="(/,T2,A)") REPEAT("-", 79)
253 4981 : WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
254 4981 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T80,A)") "-", TRIM(title), "-"
255 4981 : WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
256 4981 : WRITE (UNIT=iw, FMT="(T2,A)") REPEAT("-", 79)
257 4981 : IF (report_maxloc) THEN
258 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
259 0 : "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
260 : ELSE
261 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18)") &
262 4981 : "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
263 : END IF
264 :
265 : WRITE (UNIT=iw, FMT="(T33,A)") &
266 4981 : "MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM"
267 :
268 : ! sort statistics
269 4981 : num_routines = list_size(reports)
270 14943 : ALLOCATE (max_costs(num_routines))
271 1972413 : DO i = 1, num_routines
272 1967432 : r_report_i => list_get(reports, i)
273 1972413 : IF (sort_by_exclusiv_cost) THEN
274 1442 : max_costs(i) = r_report_i%max_ecost
275 : ELSE
276 1965990 : max_costs(i) = r_report_i%max_icost
277 : END IF
278 : END DO
279 14943 : ALLOCATE (indices(num_routines))
280 4981 : CALL sort(max_costs, num_routines, indices)
281 :
282 1977394 : maxcost = MAXVAL(max_costs)
283 4981 : mincost = maxcost*threshold
284 :
285 : ! adjust fmt dynamically based on the max walltime.
286 : ! few clocks have more than 3 digits resolution, so stop there
287 4981 : decimals = 3
288 4981 : IF (maxcost >= 10000) decimals = 2
289 0 : IF (maxcost >= 100000) decimals = 1
290 4981 : IF (maxcost >= 1000000) decimals = 0
291 4981 : IF (report_maxloc) THEN
292 : WRITE (UNIT=fmt, FMT="(A,I0,A)") &
293 0 : "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
294 : ELSE
295 : WRITE (UNIT=fmt, FMT="(A,I0,A)") &
296 4981 : "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
297 : END IF
298 :
299 : !write output
300 1972413 : DO i = num_routines, 1, -1
301 1972413 : IF (max_costs(i) >= mincost) THEN
302 262012 : j = indices(i)
303 262012 : r_report_j => list_get(reports, j)
304 : ! average stack depth
305 : asd = REAL(r_report_j%sum_stackdepth, KIND=dp)/ &
306 262012 : REAL(MAX(1_int_8, r_report_j%sum_total_calls), KIND=dp)
307 262012 : IF (report_maxloc) THEN
308 : WRITE (UNIT=iw, FMT=fmt) &
309 0 : ADJUSTL(r_report_j%routineN(1:31)), &
310 0 : r_report_j%max_total_calls, &
311 0 : asd, &
312 0 : r_report_j%sum_ecost/para_env%num_pe, &
313 0 : r_report_j%max_ecost, &
314 0 : r_report_j%sum_icost/para_env%num_pe, &
315 0 : r_report_j%max_icost, &
316 0 : r_report_j%max_erank
317 : ELSE
318 : WRITE (UNIT=iw, FMT=fmt) &
319 262012 : ADJUSTL(r_report_j%routineN(1:31)), &
320 262012 : r_report_j%max_total_calls, &
321 262012 : asd, &
322 262012 : r_report_j%sum_ecost/para_env%num_pe, &
323 262012 : r_report_j%max_ecost, &
324 262012 : r_report_j%sum_icost/para_env%num_pe, &
325 524024 : r_report_j%max_icost
326 : END IF
327 : END IF
328 : END DO
329 4981 : WRITE (UNIT=iw, FMT="(T2,A,/)") REPEAT("-", 79)
330 :
331 4981 : END SUBROUTINE print_reports
332 :
333 : ! **************************************************************************************************
334 : !> \brief Write accumulated callgraph information as cachegrind-file.
335 : !> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
336 : !> \param filename ...
337 : !> \par History
338 : !> 12.2012 initial version[ole]
339 : !> \author Ole Schuett
340 : ! **************************************************************************************************
341 1 : SUBROUTINE timings_report_callgraph(filename)
342 : CHARACTER(len=*), INTENT(in) :: filename
343 :
344 : INTEGER, PARAMETER :: E = 1000, T = 100000
345 :
346 : INTEGER :: i, unit
347 : TYPE(call_stat_type), POINTER :: c_stat
348 1 : TYPE(callgraph_item_type), DIMENSION(:), POINTER :: ct_items
349 : TYPE(routine_stat_type), POINTER :: r_stat
350 : TYPE(timer_env_type), POINTER :: timer_env
351 :
352 : CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
353 1 : file_form="FORMATTED", unit_number=unit)
354 1 : timer_env => get_timer_env()
355 :
356 : ! use outermost routine as total runtime
357 1 : r_stat => list_get(timer_env%routine_stats, 1)
358 1 : WRITE (UNIT=unit, FMT="(A)") "events: Walltime Energy"
359 1 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "summary: ", &
360 1 : INT(T*r_stat%incl_walltime_accu, KIND=int_8), &
361 2 : INT(E*r_stat%incl_energy_accu, KIND=int_8)
362 :
363 453 : DO i = 1, list_size(timer_env%routine_stats)
364 452 : r_stat => list_get(timer_env%routine_stats, i)
365 452 : WRITE (UNIT=unit, FMT="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
366 452 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
367 452 : INT(T*r_stat%excl_walltime_accu, KIND=int_8), &
368 905 : INT(E*r_stat%excl_energy_accu, KIND=int_8)
369 : END DO
370 :
371 1 : ct_items => callgraph_items(timer_env%callgraph)
372 792 : DO i = 1, SIZE(ct_items)
373 791 : c_stat => ct_items(i)%value
374 791 : WRITE (UNIT=unit, FMT="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
375 791 : WRITE (UNIT=unit, FMT="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
376 791 : WRITE (UNIT=unit, FMT="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
377 791 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
378 791 : INT(T*c_stat%incl_walltime_accu, KIND=int_8), &
379 1583 : INT(E*c_stat%incl_energy_accu, KIND=int_8)
380 : END DO
381 1 : DEALLOCATE (ct_items)
382 :
383 1 : CALL close_file(unit_number=unit, file_status="KEEP")
384 :
385 1 : END SUBROUTINE timings_report_callgraph
386 : END MODULE timings_report
387 :
|