Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_library.h"
8 : #include "../mpiwrap/cp_mpi.h"
9 : #include "../offload/offload_library.h"
10 : #include "../offload/offload_mempool.h"
11 :
12 : #include <assert.h>
13 : #include <inttypes.h>
14 : #include <omp.h>
15 : #include <stdio.h>
16 : #include <stdlib.h>
17 : #include <string.h>
18 :
19 : #define DBM_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT) \
20 : ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
21 : #define DBM_NUM_COUNTERS 64
22 :
23 : static int64_t **per_thread_counters = NULL;
24 : static bool library_initialized = false;
25 : static int max_threads = 0;
26 :
27 : #if !defined(_OPENMP)
28 : #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
29 : #endif
30 :
31 : /*******************************************************************************
32 : * \brief Initializes the DBM library.
33 : * \author Ole Schuett
34 : ******************************************************************************/
35 10328 : void dbm_library_init(void) {
36 10328 : assert(omp_get_num_threads() == 1);
37 :
38 10328 : if (library_initialized) {
39 0 : fprintf(stderr, "DBM library was already initialized.\n");
40 0 : abort();
41 : }
42 :
43 10328 : offload_init();
44 :
45 10328 : max_threads = omp_get_max_threads();
46 10328 : per_thread_counters = malloc(max_threads * sizeof(int64_t *));
47 10328 : assert(per_thread_counters != NULL);
48 :
49 : // Using parallel regions to ensure memory is allocated near a thread's core.
50 : #pragma omp parallel default(none) shared(per_thread_counters) \
51 : num_threads(max_threads)
52 : {
53 : const int ithread = omp_get_thread_num();
54 : const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
55 : per_thread_counters[ithread] = malloc(counters_size);
56 : assert(per_thread_counters[ithread] != NULL);
57 : memset(per_thread_counters[ithread], 0, counters_size);
58 : }
59 :
60 10328 : library_initialized = true;
61 10328 : }
62 :
63 : /*******************************************************************************
64 : * \brief Finalizes the DBM library.
65 : * \author Ole Schuett
66 : ******************************************************************************/
67 10328 : void dbm_library_finalize(void) {
68 10328 : assert(omp_get_num_threads() == 1);
69 :
70 10328 : if (!library_initialized) {
71 0 : fprintf(stderr, "Error: DBM library is not initialized.\n");
72 0 : abort();
73 : }
74 :
75 20656 : for (int i = 0; i < max_threads; i++) {
76 10328 : free(per_thread_counters[i]);
77 : }
78 10328 : free(per_thread_counters);
79 10328 : per_thread_counters = NULL;
80 :
81 10328 : offload_mempool_clear();
82 10328 : library_initialized = false;
83 10328 : }
84 :
85 : /*******************************************************************************
86 : * \brief Computes min(3, floor(log10(x))).
87 : * \author Ole Schuett
88 : ******************************************************************************/
89 87063477 : static int floorlog10(const int x) {
90 87063477 : return (100 <= x ? (1000 <= x ? 3 : 2) : (10 <= x ? 1 : 0));
91 : }
92 :
93 : /*******************************************************************************
94 : * \brief Add given block multiplication to stats. This routine is thread-safe.
95 : * \author Ole Schuett
96 : ******************************************************************************/
97 29021159 : void dbm_library_counter_increment(const int m, const int n, const int k) {
98 29021159 : const int ithread = omp_get_thread_num();
99 29021159 : assert(ithread < max_threads);
100 29021159 : const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
101 29021159 : per_thread_counters[ithread][idx]++;
102 29021159 : }
103 :
104 : /*******************************************************************************
105 : * \brief Comperator passed to qsort to compare two counters.
106 : * \author Ole Schuett
107 : ******************************************************************************/
108 2008588 : static int compare_counters(const void *a, const void *b) {
109 2008588 : return *(const int64_t *)b - *(const int64_t *)a;
110 : }
111 :
112 : /*******************************************************************************
113 : * \brief Prints statistics gathered by the DBM library.
114 : * \author Ole Schuett
115 : ******************************************************************************/
116 10446 : void dbm_library_print_stats(const int fortran_comm,
117 : void (*print_func)(const char *, int, int),
118 : const int output_unit) {
119 10446 : assert(omp_get_num_threads() == 1);
120 :
121 10446 : if (!library_initialized) {
122 0 : fprintf(stderr, "Error: DBM library is not initialized.\n");
123 0 : abort();
124 : }
125 :
126 10446 : const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);
127 : // Sum all counters across threads and mpi ranks.
128 10446 : int64_t counters[DBM_NUM_COUNTERS][2] = {{0}};
129 10446 : double total = 0.0;
130 678990 : for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
131 668544 : counters[i][1] = i; // needed as inverse index after qsort
132 1337088 : for (int j = 0; j < max_threads; j++) {
133 668544 : counters[i][0] += per_thread_counters[j][i];
134 : }
135 668544 : cp_mpi_sum_int64(&counters[i][0], 1, comm);
136 668544 : total += counters[i][0];
137 : }
138 :
139 : // Sort counters.
140 10446 : qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);
141 :
142 : // Determine if anything needs to be printed.
143 10446 : bool print = false;
144 655680 : for (int i = 0; i < DBM_NUM_COUNTERS && !print; i++) {
145 645234 : if (counters[i][0] != 0) {
146 370 : print = true;
147 : }
148 : }
149 10446 : if (!print) {
150 10076 : return; // nothing to be printed
151 : }
152 :
153 : // Print counters.
154 370 : DBM_LIBRARY_PRINT(print_func, "\n", output_unit);
155 370 : DBM_LIBRARY_PRINT(
156 : print_func,
157 : " ----------------------------------------------------------------"
158 : "---------------\n",
159 : output_unit);
160 370 : DBM_LIBRARY_PRINT(
161 : print_func,
162 : " - "
163 : " -\n",
164 : output_unit);
165 370 : DBM_LIBRARY_PRINT(
166 : print_func,
167 : " - DBM STATISTICS "
168 : " -\n",
169 : output_unit);
170 370 : DBM_LIBRARY_PRINT(
171 : print_func,
172 : " - "
173 : " -\n",
174 : output_unit);
175 370 : DBM_LIBRARY_PRINT(
176 : print_func,
177 : " ----------------------------------------------------------------"
178 : "---------------\n",
179 : output_unit);
180 370 : DBM_LIBRARY_PRINT(
181 : print_func,
182 : " M x N x K "
183 : "COUNT PERCENT\n",
184 : output_unit);
185 :
186 370 : const char *labels[] = {"?", "??", "???", ">999"};
187 370 : char buffer[100];
188 24050 : for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
189 23680 : if (counters[i][0] == 0) {
190 20764 : continue; // skip empty counters
191 : }
192 2916 : const double percent = 100.0 * counters[i][0] / total;
193 2916 : const int idx = counters[i][1];
194 2916 : const int m = (idx % 64) / 16;
195 2916 : const int n = (idx % 16) / 4;
196 2916 : const int k = (idx % 4) / 1;
197 2916 : snprintf(buffer, sizeof(buffer),
198 : " %4s x %4s x %4s %46" PRId64 " %10.2f%%\n", labels[m],
199 : labels[n], labels[k], counters[i][0], percent);
200 2916 : DBM_LIBRARY_PRINT(print_func, buffer, output_unit);
201 : }
202 :
203 370 : DBM_LIBRARY_PRINT(
204 : print_func,
205 : " ----------------------------------------------------------------"
206 : "---------------\n",
207 : output_unit);
208 : }
209 :
210 : // EOF
|