Line data Source code
1 : !--------------------------------------------------------------------------------------------------!
2 : ! CP2K: A general program to perform molecular dynamics simulations !
3 : ! Copyright 2000-2025 CP2K developers group <https://cp2k.org> !
4 : ! !
5 : ! SPDX-License-Identifier: GPL-2.0-or-later !
6 : !--------------------------------------------------------------------------------------------------!
7 :
8 : ! **************************************************************************************************
9 : !> \brief Timing routines for accounting
10 : !> \par History
11 : !> 02.2004 made a stacked version (of stacks...) [Joost VandeVondele]
12 : !> 11.2004 storable timer_envs (for f77 interface) [fawzi]
13 : !> 10.2005 binary search to speed up lookup in timeset [fawzi]
14 : !> 12.2012 Complete rewrite based on dictionaries. [ole]
15 : !> 01.2014 Collect statistics from all MPI ranks. [ole]
16 : !> \author JGH
17 : ! **************************************************************************************************
18 : MODULE timings_report
19 : USE callgraph, ONLY: callgraph_item_type,&
20 : callgraph_items
21 : USE cp_files, ONLY: close_file,&
22 : open_file
23 : USE kinds, ONLY: default_string_length,&
24 : dp,&
25 : int_8
26 : USE list, ONLY: list_destroy,&
27 : list_get,&
28 : list_init,&
29 : list_isready,&
30 : list_pop,&
31 : list_push,&
32 : list_size
33 : USE list_routinereport, ONLY: list_routinereport_type
34 : USE message_passing, ONLY: mp_para_env_type
35 : USE routine_map, ONLY: routine_map_get,&
36 : routine_map_haskey
37 : USE timings, ONLY: get_timer_env
38 : USE timings_base_type, ONLY: call_stat_type,&
39 : routine_report_type,&
40 : routine_stat_type
41 : USE timings_types, ONLY: timer_env_type
42 : USE util, ONLY: sort
43 : #include "../base/base_uses.f90"
44 :
45 : IMPLICIT NONE
46 : PRIVATE
47 :
48 : INTEGER, PUBLIC, PARAMETER :: cost_type_time = 17, cost_type_energy = 18
49 :
50 : PUBLIC :: timings_report_print, timings_report_callgraph
51 :
52 : CONTAINS
53 :
54 : ! **************************************************************************************************
55 : !> \brief Print accumulated information on timers
56 : !> \param iw ...
57 : !> \param r_timings ...
58 : !> \param sort_by_self_time ...
59 : !> \param cost_type ...
60 : !> \param report_maxloc ...
61 : !> \param para_env is needed to collect statistics from other nodes.
62 : !> \par History
63 : !> none
64 : !> \author JGH
65 : ! **************************************************************************************************
66 9881 : SUBROUTINE timings_report_print(iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
67 : INTEGER, INTENT(IN) :: iw
68 : REAL(KIND=dp), INTENT(IN) :: r_timings
69 : LOGICAL, INTENT(IN) :: sort_by_self_time
70 : INTEGER, INTENT(IN) :: cost_type
71 : LOGICAL, INTENT(IN) :: report_maxloc
72 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
73 :
74 : TYPE(list_routinereport_type) :: reports
75 : TYPE(routine_report_type), POINTER :: r_report
76 :
77 9881 : CALL list_init(reports)
78 9881 : CALL collect_reports_from_ranks(reports, cost_type, para_env)
79 :
80 9881 : IF (list_size(reports) > 0 .AND. iw > 0) &
81 5044 : CALL print_reports(reports, iw, r_timings, sort_by_self_time, cost_type, report_maxloc, para_env)
82 :
83 : ! deallocate reports
84 3968324 : DO WHILE (list_size(reports) > 0)
85 3958443 : r_report => list_pop(reports)
86 3958443 : DEALLOCATE (r_report)
87 : END DO
88 9881 : CALL list_destroy(reports)
89 :
90 9881 : END SUBROUTINE timings_report_print
91 :
92 : ! **************************************************************************************************
93 : !> \brief Collects the timing or energy reports from all MPI ranks.
94 : !> \param reports ...
95 : !> \param cost_type ...
96 : !> \param para_env ...
97 : !> \author Ole Schuett
98 : ! **************************************************************************************************
99 9881 : SUBROUTINE collect_reports_from_ranks(reports, cost_type, para_env)
100 : TYPE(list_routinereport_type), INTENT(INOUT) :: reports
101 : INTEGER, INTENT(IN) :: cost_type
102 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
103 :
104 : CHARACTER(LEN=default_string_length) :: routineN
105 : INTEGER :: local_routine_id, sending_rank
106 9881 : INTEGER, ALLOCATABLE, DIMENSION(:) :: collected
107 : REAL(KIND=dp) :: foobar
108 : REAL(KIND=dp), DIMENSION(2) :: dbuf
109 : TYPE(routine_report_type), POINTER :: r_report
110 : TYPE(routine_stat_type), POINTER :: r_stat
111 : TYPE(timer_env_type), POINTER :: timer_env
112 :
113 9881 : NULLIFY (r_stat, r_report, timer_env)
114 9881 : IF (.NOT. list_isready(reports)) &
115 0 : CPABORT("BUG")
116 :
117 9881 : timer_env => get_timer_env()
118 :
119 : ! make sure all functions have been called so that list_size(timer_env%routine_stats)
120 : ! and the actual dictionary are consistent in the loop below, preventing out of bounds.
121 : ! this hack makes sure they are called before
122 9881 : routineN = ""
123 9881 : CALL para_env%bcast(routineN, 0)
124 9881 : sending_rank = 0
125 9881 : CALL para_env%max(sending_rank)
126 9881 : CALL para_env%sum(sending_rank)
127 9881 : foobar = 0.0_dp
128 9881 : CALL para_env%max(foobar)
129 9881 : dbuf = 0.0_dp
130 9881 : CALL para_env%maxloc(dbuf)
131 9881 : CALL para_env%sum(foobar)
132 : ! end hack
133 :
134 : ! Array collected is used as a bit field.
135 : ! It's of type integer in order to use the convenient MINLOC routine.
136 3973292 : ALLOCATE (collected(list_size(timer_env%routine_stats)), SOURCE=0)
137 :
138 3958443 : DO
139 : ! does any rank have uncollected stats?
140 3968324 : sending_rank = -1
141 812465790 : IF (.NOT. ALL(collected == 1)) sending_rank = para_env%mepos
142 3968324 : CALL para_env%max(sending_rank)
143 3968324 : IF (sending_rank < 0) EXIT ! every rank got all routines collected
144 3958443 : IF (sending_rank == para_env%mepos) THEN
145 963399605 : local_routine_id = MINLOC(collected, dim=1)
146 2002232 : r_stat => list_get(timer_env%routine_stats, local_routine_id)
147 2002232 : routineN = r_stat%routineN
148 : END IF
149 3958443 : CALL para_env%bcast(routineN, sending_rank)
150 :
151 : ! Create new report for routineN
152 3958443 : ALLOCATE (r_report)
153 3958443 : CALL list_push(reports, r_report)
154 3958443 : r_report%routineN = routineN
155 :
156 : ! If routineN was called on local node, add local stats
157 3958443 : IF (routine_map_haskey(timer_env%routine_names, routineN)) THEN
158 3943649 : local_routine_id = routine_map_get(timer_env%routine_names, routineN)
159 3943649 : collected(local_routine_id) = 1
160 3943649 : r_stat => list_get(timer_env%routine_stats, local_routine_id)
161 3943649 : r_report%max_total_calls = r_stat%total_calls
162 3943649 : r_report%sum_total_calls = r_stat%total_calls
163 3943649 : r_report%sum_stackdepth = r_stat%stackdepth_accu
164 3943649 : SELECT CASE (cost_type)
165 : CASE (cost_type_energy)
166 0 : r_report%max_icost = r_stat%incl_energy_accu
167 0 : r_report%sum_icost = r_stat%incl_energy_accu
168 0 : r_report%max_ecost = r_stat%excl_energy_accu
169 0 : r_report%sum_ecost = r_stat%excl_energy_accu
170 : CASE (cost_type_time)
171 3943649 : r_report%max_icost = r_stat%incl_walltime_accu
172 3943649 : r_report%sum_icost = r_stat%incl_walltime_accu
173 3943649 : r_report%max_ecost = r_stat%excl_walltime_accu
174 3943649 : r_report%sum_ecost = r_stat%excl_walltime_accu
175 : CASE DEFAULT
176 3943649 : CPABORT("BUG")
177 : END SELECT
178 : END IF
179 :
180 : ! collect stats of routineN via MPI
181 3958443 : CALL para_env%max(r_report%max_total_calls)
182 3958443 : CALL para_env%sum(r_report%sum_total_calls)
183 3958443 : CALL para_env%sum(r_report%sum_stackdepth)
184 :
185 : ! get value and rank of the maximum inclusive cost
186 11875329 : dbuf = [r_report%max_icost, REAL(para_env%mepos, KIND=dp)]
187 3958443 : CALL para_env%maxloc(dbuf)
188 3958443 : r_report%max_icost = dbuf(1)
189 3958443 : r_report%max_irank = INT(dbuf(2))
190 :
191 3958443 : CALL para_env%sum(r_report%sum_icost)
192 :
193 : ! get value and rank of the maximum exclusive cost
194 11875329 : dbuf = [r_report%max_ecost, REAL(para_env%mepos, KIND=dp)]
195 3958443 : CALL para_env%maxloc(dbuf)
196 3958443 : r_report%max_ecost = dbuf(1)
197 3958443 : r_report%max_erank = INT(dbuf(2))
198 :
199 3968324 : CALL para_env%sum(r_report%sum_ecost)
200 : END DO
201 :
202 9881 : END SUBROUTINE collect_reports_from_ranks
203 :
204 : ! **************************************************************************************************
205 : !> \brief Print the collected reports
206 : !> \param reports ...
207 : !> \param iw ...
208 : !> \param threshold ...
209 : !> \param sort_by_exclusiv_cost ...
210 : !> \param cost_type ...
211 : !> \param report_maxloc ...
212 : !> \param para_env ...
213 : !> \par History
214 : !> 01.2014 Refactored (Ole Schuett)
215 : !> \author JGH
216 : ! **************************************************************************************************
217 5044 : SUBROUTINE print_reports(reports, iw, threshold, sort_by_exclusiv_cost, cost_type, report_maxloc, para_env)
218 : TYPE(list_routinereport_type), INTENT(IN) :: reports
219 : INTEGER, INTENT(IN) :: iw
220 : REAL(KIND=dp), INTENT(IN) :: threshold
221 : LOGICAL, INTENT(IN) :: sort_by_exclusiv_cost
222 : INTEGER, INTENT(IN) :: cost_type
223 : LOGICAL, INTENT(IN) :: report_maxloc
224 : TYPE(mp_para_env_type), INTENT(IN) :: para_env
225 :
226 : CHARACTER(LEN=4) :: label
227 : CHARACTER(LEN=default_string_length) :: fmt, title
228 : INTEGER :: decimals, i, j, num_routines
229 5044 : INTEGER, ALLOCATABLE, DIMENSION(:) :: indices
230 : REAL(KIND=dp) :: asd, maxcost, mincost
231 5044 : REAL(KIND=dp), ALLOCATABLE, DIMENSION(:) :: max_costs
232 : TYPE(routine_report_type), POINTER :: r_report_i, r_report_j
233 :
234 5044 : NULLIFY (r_report_i, r_report_j)
235 5044 : IF (.NOT. list_isready(reports)) &
236 0 : CPABORT("BUG")
237 :
238 : ! are we printing timing or energy ?
239 5044 : SELECT CASE (cost_type)
240 : CASE (cost_type_energy)
241 0 : title = "E N E R G Y"
242 0 : label = "ENER"
243 : CASE (cost_type_time)
244 5044 : title = "T I M I N G"
245 5044 : label = "TIME"
246 : CASE DEFAULT
247 5044 : CPABORT("BUG")
248 : END SELECT
249 :
250 : ! write banner
251 5044 : WRITE (UNIT=iw, FMT="(/,T2,A)") REPEAT("-", 79)
252 5044 : WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
253 5044 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T80,A)") "-", TRIM(title), "-"
254 5044 : WRITE (UNIT=iw, FMT="(T2,A,T80,A)") "-", "-"
255 5044 : WRITE (UNIT=iw, FMT="(T2,A)") REPEAT("-", 79)
256 5044 : IF (report_maxloc) THEN
257 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18,A8)") &
258 0 : "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label, "MAXRANK"
259 : ELSE
260 : WRITE (UNIT=iw, FMT="(T2,A,T35,A,T41,A,T45,2A18)") &
261 5044 : "SUBROUTINE", "CALLS", " ASD", "SELF "//label, "TOTAL "//label
262 : END IF
263 :
264 : WRITE (UNIT=iw, FMT="(T33,A)") &
265 5044 : "MAXIMUM AVERAGE MAXIMUM AVERAGE MAXIMUM"
266 :
267 : ! sort statistics
268 5044 : num_routines = list_size(reports)
269 15132 : ALLOCATE (max_costs(num_routines))
270 2007276 : DO i = 1, num_routines
271 2002232 : r_report_i => list_get(reports, i)
272 2007276 : IF (sort_by_exclusiv_cost) THEN
273 1436 : max_costs(i) = r_report_i%max_ecost
274 : ELSE
275 2000796 : max_costs(i) = r_report_i%max_icost
276 : END IF
277 : END DO
278 15132 : ALLOCATE (indices(num_routines))
279 5044 : CALL sort(max_costs, num_routines, indices)
280 :
281 2012320 : maxcost = MAXVAL(max_costs)
282 5044 : mincost = maxcost*threshold
283 :
284 : ! adjust fmt dynamically based on the max walltime.
285 : ! few clocks have more than 3 digits resolution, so stop there
286 5044 : decimals = 3
287 5044 : IF (maxcost >= 10000) decimals = 2
288 0 : IF (maxcost >= 100000) decimals = 1
289 5044 : IF (maxcost >= 1000000) decimals = 0
290 5044 : IF (report_maxloc) THEN
291 : WRITE (UNIT=fmt, FMT="(A,I0,A)") &
292 0 : "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "),I8)"
293 : ELSE
294 : WRITE (UNIT=fmt, FMT="(A,I0,A)") &
295 5044 : "(T2,A30,1X,I7,1X,F4.1,4(1X,F8.", decimals, "))"
296 : END IF
297 :
298 : !write output
299 2007276 : DO i = num_routines, 1, -1
300 2007276 : IF (max_costs(i) >= mincost) THEN
301 264448 : j = indices(i)
302 264448 : r_report_j => list_get(reports, j)
303 : ! average stack depth
304 : asd = REAL(r_report_j%sum_stackdepth, KIND=dp)/ &
305 264448 : REAL(MAX(1_int_8, r_report_j%sum_total_calls), KIND=dp)
306 264448 : IF (report_maxloc) THEN
307 : WRITE (UNIT=iw, FMT=fmt) &
308 0 : ADJUSTL(r_report_j%routineN(1:31)), &
309 0 : r_report_j%max_total_calls, &
310 0 : asd, &
311 0 : r_report_j%sum_ecost/para_env%num_pe, &
312 0 : r_report_j%max_ecost, &
313 0 : r_report_j%sum_icost/para_env%num_pe, &
314 0 : r_report_j%max_icost, &
315 0 : r_report_j%max_erank
316 : ELSE
317 : WRITE (UNIT=iw, FMT=fmt) &
318 264448 : ADJUSTL(r_report_j%routineN(1:31)), &
319 264448 : r_report_j%max_total_calls, &
320 264448 : asd, &
321 264448 : r_report_j%sum_ecost/para_env%num_pe, &
322 264448 : r_report_j%max_ecost, &
323 264448 : r_report_j%sum_icost/para_env%num_pe, &
324 528896 : r_report_j%max_icost
325 : END IF
326 : END IF
327 : END DO
328 5044 : WRITE (UNIT=iw, FMT="(T2,A,/)") REPEAT("-", 79)
329 :
330 5044 : END SUBROUTINE print_reports
331 :
332 : ! **************************************************************************************************
333 : !> \brief Write accumulated callgraph information as cachegrind-file.
334 : !> http://kcachegrind.sourceforge.net/cgi-bin/show.cgi/KcacheGrindCalltreeFormat
335 : !> \param filename ...
336 : !> \par History
337 : !> 12.2012 initial version[ole]
338 : !> \author Ole Schuett
339 : ! **************************************************************************************************
340 1 : SUBROUTINE timings_report_callgraph(filename)
341 : CHARACTER(len=*), INTENT(in) :: filename
342 :
343 : INTEGER, PARAMETER :: E = 1000, T = 100000
344 :
345 : INTEGER :: i, unit
346 : TYPE(call_stat_type), POINTER :: c_stat
347 1 : TYPE(callgraph_item_type), DIMENSION(:), POINTER :: ct_items
348 : TYPE(routine_stat_type), POINTER :: r_stat
349 : TYPE(timer_env_type), POINTER :: timer_env
350 :
351 : CALL open_file(file_name=filename, file_status="REPLACE", file_action="WRITE", &
352 1 : file_form="FORMATTED", unit_number=unit)
353 1 : timer_env => get_timer_env()
354 :
355 : ! use outermost routine as total runtime
356 1 : r_stat => list_get(timer_env%routine_stats, 1)
357 1 : WRITE (UNIT=unit, FMT="(A)") "events: Walltime Energy"
358 1 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "summary: ", &
359 1 : INT(T*r_stat%incl_walltime_accu, KIND=int_8), &
360 2 : INT(E*r_stat%incl_energy_accu, KIND=int_8)
361 :
362 452 : DO i = 1, list_size(timer_env%routine_stats)
363 451 : r_stat => list_get(timer_env%routine_stats, i)
364 451 : WRITE (UNIT=unit, FMT="(A,I0,A,A)") "fn=(", r_stat%routine_id, ") ", r_stat%routineN
365 451 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
366 451 : INT(T*r_stat%excl_walltime_accu, KIND=int_8), &
367 903 : INT(E*r_stat%excl_energy_accu, KIND=int_8)
368 : END DO
369 :
370 1 : ct_items => callgraph_items(timer_env%callgraph)
371 790 : DO i = 1, SIZE(ct_items)
372 789 : c_stat => ct_items(i)%value
373 789 : WRITE (UNIT=unit, FMT="(A,I0,A)") "fn=(", ct_items(i)%key(1), ")"
374 789 : WRITE (UNIT=unit, FMT="(A,I0,A)") "cfn=(", ct_items(i)%key(2), ")"
375 789 : WRITE (UNIT=unit, FMT="(A,I0,A)") "calls=", c_stat%total_calls, " 1"
376 789 : WRITE (UNIT=unit, FMT="(A,I0,1X,I0)") "1 ", &
377 789 : INT(T*c_stat%incl_walltime_accu, KIND=int_8), &
378 1579 : INT(E*c_stat%incl_energy_accu, KIND=int_8)
379 : END DO
380 1 : DEALLOCATE (ct_items)
381 :
382 1 : CALL close_file(unit_number=unit, file_status="KEEP")
383 :
384 1 : END SUBROUTINE timings_report_callgraph
385 : END MODULE timings_report
386 :
|