Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : 8 : #include <assert.h> 9 : #include <inttypes.h> 10 : #include <omp.h> 11 : #include <stdbool.h> 12 : #include <stdio.h> 13 : #include <stdlib.h> 14 : #include <string.h> 15 : 16 : #include "dbm_library.h" 17 : #include "dbm_mempool.h" 18 : #include "dbm_mpi.h" 19 : 20 : #define DBM_NUM_COUNTERS 64 21 : 22 : static int64_t **per_thread_counters = NULL; 23 : static bool library_initialized = false; 24 : static int max_threads = 0; 25 : 26 : #if !defined(_OPENMP) 27 : #error "OpenMP is required. Please add -fopenmp to your C compiler flags." 28 : #endif 29 : 30 : /******************************************************************************* 31 : * \brief Initializes the DBM library. 32 : * \author Ole Schuett 33 : ******************************************************************************/ 34 8396 : void dbm_library_init(void) { 35 8396 : assert(omp_get_num_threads() == 1); 36 : 37 8396 : if (library_initialized) { 38 0 : fprintf(stderr, "DBM library was already initialized.\n"); 39 0 : abort(); 40 : } 41 : 42 8396 : max_threads = omp_get_max_threads(); 43 8396 : per_thread_counters = malloc(max_threads * sizeof(int64_t *)); 44 : 45 : // Using parallel regions to ensure memory is allocated near a thread's core. 46 : #pragma omp parallel default(none) shared(per_thread_counters) \ 47 : num_threads(max_threads) 48 : { 49 : const int ithread = omp_get_thread_num(); 50 : const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t); 51 : per_thread_counters[ithread] = malloc(counters_size); 52 : memset(per_thread_counters[ithread], 0, counters_size); 53 : } 54 : 55 8396 : library_initialized = true; 56 8396 : } 57 : 58 : /******************************************************************************* 59 : * \brief Finalizes the DBM library. 60 : * \author Ole Schuett 61 : ******************************************************************************/ 62 8396 : void dbm_library_finalize(void) { 63 8396 : assert(omp_get_num_threads() == 1); 64 : 65 8396 : if (!library_initialized) { 66 0 : fprintf(stderr, "Error: DBM library is not initialized.\n"); 67 0 : abort(); 68 : } 69 : 70 16792 : for (int i = 0; i < max_threads; i++) { 71 8396 : free(per_thread_counters[i]); 72 : } 73 8396 : free(per_thread_counters); 74 8396 : per_thread_counters = NULL; 75 : 76 8396 : dbm_mempool_clear(); 77 8396 : library_initialized = false; 78 8396 : } 79 : 80 : /******************************************************************************* 81 : * \brief Computes min(3, floor(log10(x))). 82 : * \author Ole Schuett 83 : ******************************************************************************/ 84 62430864 : static int floorlog10(const int x) { 85 62430864 : if (x >= 1000) { 86 : return 3; 87 : } 88 62430480 : if (x >= 100) { 89 : return 2; 90 : } 91 59834827 : if (x >= 10) { 92 18060722 : return 1; 93 : } 94 : return 0; 95 : } 96 : 97 : /******************************************************************************* 98 : * \brief Add given block multiplication to stats. This routine is thread-safe. 99 : * \author Ole Schuett 100 : ******************************************************************************/ 101 20810288 : void dbm_library_counter_increment(const int m, const int n, const int k) { 102 20810288 : const int ithread = omp_get_thread_num(); 103 20810288 : assert(ithread < max_threads); 104 20810288 : const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k); 105 20810288 : per_thread_counters[ithread][idx]++; 106 20810288 : } 107 : 108 : /******************************************************************************* 109 : * \brief Comperator passed to qsort to compare two counters. 110 : * \author Ole Schuett 111 : ******************************************************************************/ 112 1637116 : static int compare_counters(const void *a, const void *b) { 113 1637116 : return *(const int64_t *)b - *(const int64_t *)a; 114 : } 115 : 116 : /******************************************************************************* 117 : * \brief Prints statistics gathered by the DBM library. 118 : * \author Ole Schuett 119 : ******************************************************************************/ 120 8514 : void dbm_library_print_stats(const int fortran_comm, 121 : void (*print_func)(char *, int), 122 : const int output_unit) { 123 8514 : assert(omp_get_num_threads() == 1); 124 : 125 8514 : if (!library_initialized) { 126 0 : fprintf(stderr, "Error: DBM library is not initialized.\n"); 127 0 : abort(); 128 : } 129 : 130 8514 : const dbm_mpi_comm_t comm = dbm_mpi_comm_f2c(fortran_comm); 131 : // Sum all counters across threads and mpi ranks. 132 8514 : int64_t counters[DBM_NUM_COUNTERS][2]; 133 8514 : memset(counters, 0, DBM_NUM_COUNTERS * 2 * sizeof(int64_t)); 134 8514 : double total = 0.0; 135 553410 : for (int i = 0; i < DBM_NUM_COUNTERS; i++) { 136 544896 : counters[i][1] = i; // needed as inverse index after qsort 137 1089792 : for (int j = 0; j < max_threads; j++) { 138 544896 : counters[i][0] += per_thread_counters[j][i]; 139 : } 140 544896 : dbm_mpi_sum_int64(&counters[i][0], 1, comm); 141 544896 : total += counters[i][0]; 142 : } 143 : 144 : // Sort counters. 145 8514 : qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters); 146 : 147 : // Print counters. 148 8514 : print_func("\n", output_unit); 149 8514 : print_func(" ----------------------------------------------------------------" 150 : "---------------\n", 151 : output_unit); 152 8514 : print_func(" - " 153 : " -\n", 154 : output_unit); 155 8514 : print_func(" - DBM STATISTICS " 156 : " -\n", 157 : output_unit); 158 8514 : print_func(" - " 159 : " -\n", 160 : output_unit); 161 8514 : print_func(" ----------------------------------------------------------------" 162 : "---------------\n", 163 : output_unit); 164 8514 : print_func(" M x N x K " 165 : "COUNT PERCENT\n", 166 : output_unit); 167 : 168 8514 : const char *labels[] = {"?", "??", "???", ">999"}; 169 553410 : for (int i = 0; i < DBM_NUM_COUNTERS; i++) { 170 544896 : if (counters[i][0] == 0) { 171 542470 : continue; // skip empty counters 172 : } 173 2426 : const double percent = 100.0 * counters[i][0] / total; 174 2426 : const int idx = counters[i][1]; 175 2426 : const int m = (idx % 64) / 16; 176 2426 : const int n = (idx % 16) / 4; 177 2426 : const int k = (idx % 4) / 1; 178 2426 : char buffer[100]; 179 2426 : snprintf(buffer, sizeof(buffer), 180 : " %4s x %4s x %4s %46" PRId64 " %10.2f%%\n", labels[m], 181 : labels[n], labels[k], counters[i][0], percent); 182 2426 : print_func(buffer, output_unit); 183 : } 184 : 185 8514 : print_func(" ----------------------------------------------------------------" 186 : "---------------\n", 187 : output_unit); 188 8514 : } 189 : 190 : // EOF