Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_library.h"
8 : #include "../mpiwrap/cp_mpi.h"
9 : #include "../offload/offload_mempool.h"
10 :
11 : #include <assert.h>
12 : #include <inttypes.h>
13 : #include <omp.h>
14 : #include <stdio.h>
15 : #include <stdlib.h>
16 : #include <string.h>
17 :
18 : #define DBM_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT) \
19 : ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
20 : #define DBM_NUM_COUNTERS 64
21 :
22 : static int64_t **per_thread_counters = NULL;
23 : static bool library_initialized = false;
24 : static int max_threads = 0;
25 :
26 : #if !defined(_OPENMP)
27 : #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
28 : #endif
29 :
30 : /*******************************************************************************
31 : * \brief Initializes the DBM library.
32 : * \author Ole Schuett
33 : ******************************************************************************/
34 9288 : void dbm_library_init(void) {
35 9288 : assert(omp_get_num_threads() == 1);
36 :
37 9288 : if (library_initialized) {
38 0 : fprintf(stderr, "DBM library was already initialized.\n");
39 0 : abort();
40 : }
41 :
42 9288 : max_threads = omp_get_max_threads();
43 9288 : per_thread_counters = malloc(max_threads * sizeof(int64_t *));
44 9288 : assert(per_thread_counters != NULL);
45 :
46 : // Using parallel regions to ensure memory is allocated near a thread's core.
47 : #pragma omp parallel default(none) shared(per_thread_counters) \
48 : num_threads(max_threads)
49 : {
50 : const int ithread = omp_get_thread_num();
51 : const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
52 : per_thread_counters[ithread] = malloc(counters_size);
53 : assert(per_thread_counters[ithread] != NULL);
54 : memset(per_thread_counters[ithread], 0, counters_size);
55 : }
56 :
57 9288 : library_initialized = true;
58 9288 : }
59 :
60 : /*******************************************************************************
61 : * \brief Finalizes the DBM library.
62 : * \author Ole Schuett
63 : ******************************************************************************/
64 9288 : void dbm_library_finalize(void) {
65 9288 : assert(omp_get_num_threads() == 1);
66 :
67 9288 : if (!library_initialized) {
68 0 : fprintf(stderr, "Error: DBM library is not initialized.\n");
69 0 : abort();
70 : }
71 :
72 18576 : for (int i = 0; i < max_threads; i++) {
73 9288 : free(per_thread_counters[i]);
74 : }
75 9288 : free(per_thread_counters);
76 9288 : per_thread_counters = NULL;
77 :
78 9288 : offload_mempool_clear();
79 9288 : library_initialized = false;
80 9288 : }
81 :
82 : /*******************************************************************************
83 : * \brief Computes min(3, floor(log10(x))).
84 : * \author Ole Schuett
85 : ******************************************************************************/
86 83824062 : static int floorlog10(const int x) {
87 83824062 : if (x >= 1000) {
88 : return 3;
89 : }
90 83823678 : if (x >= 100) {
91 : return 2;
92 : }
93 80429183 : if (x >= 10) {
94 23174194 : return 1;
95 : }
96 : return 0;
97 : }
98 :
99 : /*******************************************************************************
100 : * \brief Add given block multiplication to stats. This routine is thread-safe.
101 : * \author Ole Schuett
102 : ******************************************************************************/
103 27941354 : void dbm_library_counter_increment(const int m, const int n, const int k) {
104 27941354 : const int ithread = omp_get_thread_num();
105 27941354 : assert(ithread < max_threads);
106 27941354 : const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
107 27941354 : per_thread_counters[ithread][idx]++;
108 27941354 : }
109 :
110 : /*******************************************************************************
111 : * \brief Comperator passed to qsort to compare two counters.
112 : * \author Ole Schuett
113 : ******************************************************************************/
114 1808812 : static int compare_counters(const void *a, const void *b) {
115 1808812 : return *(const int64_t *)b - *(const int64_t *)a;
116 : }
117 :
118 : /*******************************************************************************
119 : * \brief Prints statistics gathered by the DBM library.
120 : * \author Ole Schuett
121 : ******************************************************************************/
122 9406 : void dbm_library_print_stats(const int fortran_comm,
123 : void (*print_func)(const char *, int, int),
124 : const int output_unit) {
125 9406 : assert(omp_get_num_threads() == 1);
126 :
127 9406 : if (!library_initialized) {
128 0 : fprintf(stderr, "Error: DBM library is not initialized.\n");
129 0 : abort();
130 : }
131 :
132 9406 : const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);
133 : // Sum all counters across threads and mpi ranks.
134 9406 : int64_t counters[DBM_NUM_COUNTERS][2] = {{0}};
135 9406 : double total = 0.0;
136 611390 : for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
137 601984 : counters[i][1] = i; // needed as inverse index after qsort
138 1203968 : for (int j = 0; j < max_threads; j++) {
139 601984 : counters[i][0] += per_thread_counters[j][i];
140 : }
141 601984 : cp_mpi_sum_int64(&counters[i][0], 1, comm);
142 601984 : total += counters[i][0];
143 : }
144 :
145 : // Sort counters.
146 9406 : qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);
147 :
148 : // Determine if anything needs to be printed.
149 9406 : bool print = false;
150 589088 : for (int i = 0; i < DBM_NUM_COUNTERS && !print; i++) {
151 579682 : if (counters[i][0] != 0) {
152 354 : print = true;
153 : }
154 : }
155 9406 : if (!print) {
156 9052 : return; // nothing to be printed
157 : }
158 :
159 : // Print counters.
160 354 : DBM_LIBRARY_PRINT(print_func, "\n", output_unit);
161 354 : DBM_LIBRARY_PRINT(
162 : print_func,
163 : " ----------------------------------------------------------------"
164 : "---------------\n",
165 : output_unit);
166 354 : DBM_LIBRARY_PRINT(
167 : print_func,
168 : " - "
169 : " -\n",
170 : output_unit);
171 354 : DBM_LIBRARY_PRINT(
172 : print_func,
173 : " - DBM STATISTICS "
174 : " -\n",
175 : output_unit);
176 354 : DBM_LIBRARY_PRINT(
177 : print_func,
178 : " - "
179 : " -\n",
180 : output_unit);
181 354 : DBM_LIBRARY_PRINT(
182 : print_func,
183 : " ----------------------------------------------------------------"
184 : "---------------\n",
185 : output_unit);
186 354 : DBM_LIBRARY_PRINT(
187 : print_func,
188 : " M x N x K "
189 : "COUNT PERCENT\n",
190 : output_unit);
191 :
192 354 : const char *labels[] = {"?", "??", "???", ">999"};
193 354 : char buffer[100];
194 23010 : for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
195 22656 : if (counters[i][0] == 0) {
196 19850 : continue; // skip empty counters
197 : }
198 2806 : const double percent = 100.0 * counters[i][0] / total;
199 2806 : const int idx = counters[i][1];
200 2806 : const int m = (idx % 64) / 16;
201 2806 : const int n = (idx % 16) / 4;
202 2806 : const int k = (idx % 4) / 1;
203 2806 : snprintf(buffer, sizeof(buffer),
204 : " %4s x %4s x %4s %46" PRId64 " %10.2f%%\n", labels[m],
205 : labels[n], labels[k], counters[i][0], percent);
206 2806 : DBM_LIBRARY_PRINT(print_func, buffer, output_unit);
207 : }
208 :
209 354 : DBM_LIBRARY_PRINT(
210 : print_func,
211 : " ----------------------------------------------------------------"
212 : "---------------\n",
213 : output_unit);
214 : }
215 :
216 : // EOF
|