Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2023 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : 8 : #include <assert.h> 9 : #include <omp.h> 10 : #include <stddef.h> 11 : #include <stdio.h> 12 : #include <stdlib.h> 13 : #include <string.h> 14 : 15 : #include "grid_common.h" 16 : #include "grid_constants.h" 17 : #include "grid_library.h" 18 : 19 : // counter dimensions 20 : #define GRID_NBACKENDS 4 21 : #define GRID_NKERNELS 4 22 : #define GRID_MAX_LP 20 23 : 24 : typedef struct { 25 : grid_sphere_cache sphere_cache; 26 : long counters[GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP]; 27 : } grid_library_globals; 28 : 29 : static grid_library_globals **per_thread_globals = NULL; 30 : static bool library_initialized = false; 31 : static int max_threads = 0; 32 : static grid_library_config config = { 33 : .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false}; 34 : 35 : #if !defined(_OPENMP) 36 : #error "OpenMP is required. Please add -fopenmp to your C compiler flags." 37 : #endif 38 : 39 : /******************************************************************************* 40 : * \brief Initializes the grid library. 41 : * \author Ole Schuett 42 : ******************************************************************************/ 43 8038 : void grid_library_init(void) { 44 8038 : if (library_initialized) { 45 0 : printf("Error: Grid library was already initialized.\n"); 46 0 : abort(); 47 : } 48 : 49 8038 : max_threads = omp_get_max_threads(); 50 8038 : per_thread_globals = malloc(max_threads * sizeof(grid_library_globals *)); 51 : 52 : // Using parallel regions to ensure memory is allocated near a thread's core. 53 : #pragma omp parallel default(none) shared(per_thread_globals) \ 54 : num_threads(max_threads) 55 : { 56 : const int ithread = omp_get_thread_num(); 57 : per_thread_globals[ithread] = malloc(sizeof(grid_library_globals)); 58 : memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals)); 59 : } 60 : 61 8038 : library_initialized = true; 62 8038 : } 63 : 64 : /******************************************************************************* 65 : * \brief Finalizes the grid library. 66 : * \author Ole Schuett 67 : ******************************************************************************/ 68 8038 : void grid_library_finalize(void) { 69 8038 : if (!library_initialized) { 70 0 : printf("Error: Grid library is not initialized.\n"); 71 0 : abort(); 72 : } 73 : 74 16076 : for (int i = 0; i < max_threads; i++) { 75 8038 : grid_sphere_cache_free(&per_thread_globals[i]->sphere_cache); 76 8038 : free(per_thread_globals[i]); 77 : } 78 8038 : free(per_thread_globals); 79 8038 : per_thread_globals = NULL; 80 8038 : library_initialized = false; 81 8038 : } 82 : 83 : /******************************************************************************* 84 : * \brief Returns a pointer to the thread local sphere cache. 85 : * \author Ole Schuett 86 : ******************************************************************************/ 87 117986614 : grid_sphere_cache *grid_library_get_sphere_cache(void) { 88 117986614 : const int ithread = omp_get_thread_num(); 89 117986614 : assert(ithread < max_threads); 90 117986614 : return &per_thread_globals[ithread]->sphere_cache; 91 : } 92 : 93 : /******************************************************************************* 94 : * \brief Configures the grid library. 95 : * \author Ole Schuett 96 : ******************************************************************************/ 97 8154 : void grid_library_set_config(const enum grid_backend backend, 98 : const bool validate, const bool apply_cutoff) { 99 8154 : config.backend = backend; 100 8154 : config.validate = validate; 101 8154 : config.apply_cutoff = apply_cutoff; 102 8154 : } 103 : 104 : /******************************************************************************* 105 : * \brief Returns the library config. 106 : * \author Ole Schuett 107 : ******************************************************************************/ 108 370580 : grid_library_config grid_library_get_config(void) { return config; } 109 : 110 : /******************************************************************************* 111 : * \brief Adds given increment to counter specified by lp, backend, and kernel. 112 : * \author Ole Schuett 113 : ******************************************************************************/ 114 123317911 : void grid_library_counter_add(const int lp, const enum grid_backend backend, 115 : const enum grid_library_kernel kernel, 116 : const int increment) { 117 123317911 : assert(lp >= 0); 118 123317911 : assert(kernel < GRID_NKERNELS); 119 123317911 : const int back = backend - GRID_BACKEND_REF; 120 123317911 : assert(back < GRID_NBACKENDS); 121 123317911 : const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP + 122 123317911 : imin(lp, GRID_MAX_LP - 1); 123 123317911 : const int ithread = omp_get_thread_num(); 124 123317911 : assert(ithread < max_threads); 125 123317911 : per_thread_globals[ithread]->counters[idx] += increment; 126 123317911 : } 127 : 128 : /******************************************************************************* 129 : * \brief Comperator passed to qsort to compare two counters. 130 : * \author Ole Schuett 131 : ******************************************************************************/ 132 10483014 : static int compare_counters(const void *a, const void *b) { 133 10483014 : return *(long *)b - *(long *)a; 134 : } 135 : 136 : /******************************************************************************* 137 : * \brief Prints statistics gathered by the grid library. 138 : * \author Ole Schuett 139 : ******************************************************************************/ 140 8156 : void grid_library_print_stats(void (*mpi_sum_func)(long *, int), 141 : const int mpi_comm, 142 : void (*print_func)(char *, int), 143 8156 : const int output_unit) { 144 8156 : if (!library_initialized) { 145 0 : printf("Error: Grid library is not initialized.\n"); 146 0 : abort(); 147 : } 148 : 149 : // Sum all counters across threads and mpi ranks. 150 8156 : const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP; 151 8156 : long counters[ncounters][2]; 152 8156 : memset(counters, 0, ncounters * 2 * sizeof(long)); 153 8156 : double total = 0.0; 154 2618076 : for (int i = 0; i < ncounters; i++) { 155 2609920 : counters[i][1] = i; // needed as inverse index after qsort 156 5219840 : for (int j = 0; j < max_threads; j++) { 157 2609920 : counters[i][0] += per_thread_globals[j]->counters[i]; 158 : } 159 2609920 : mpi_sum_func(&counters[i][0], mpi_comm); 160 2609920 : total += counters[i][0]; 161 : } 162 : 163 : // Sort counters. 164 8156 : qsort(counters, ncounters, 2 * sizeof(long), &compare_counters); 165 : 166 : // Print counters. 167 8156 : print_func("\n", output_unit); 168 8156 : print_func(" ----------------------------------------------------------------" 169 : "---------------\n", 170 : output_unit); 171 8156 : print_func(" - " 172 : " -\n", 173 : output_unit); 174 8156 : print_func(" - GRID STATISTICS " 175 : " -\n", 176 : output_unit); 177 8156 : print_func(" - " 178 : " -\n", 179 : output_unit); 180 8156 : print_func(" ----------------------------------------------------------------" 181 : "---------------\n", 182 : output_unit); 183 8156 : print_func(" LP KERNEL BACKEND " 184 : "COUNT PERCENT\n", 185 : output_unit); 186 : 187 8156 : const char *kernel_names[] = {"collocate ortho", "integrate ortho", 188 : "collocate general", "integrate general"}; 189 8156 : const char *backend_names[] = {"REF", "CPU", "GPU", "HIP"}; 190 : 191 2618076 : for (int i = 0; i < ncounters; i++) { 192 2609920 : if (counters[i][0] == 0) 193 2564276 : continue; // skip empty counters 194 45644 : const double percent = 100.0 * counters[i][0] / total; 195 45644 : const int idx = counters[i][1]; 196 45644 : const int backend_stride = GRID_NKERNELS * GRID_MAX_LP; 197 45644 : const int back = idx / backend_stride; 198 45644 : const int kern = (idx % backend_stride) / GRID_MAX_LP; 199 45644 : const int lp = (idx % backend_stride) % GRID_MAX_LP; 200 45644 : char buffer[100]; 201 45644 : snprintf(buffer, sizeof(buffer), " %-5i %-17s %-6s %34li %10.2f%%\n", lp, 202 : kernel_names[kern], backend_names[back], counters[i][0], percent); 203 45644 : print_func(buffer, output_unit); 204 : } 205 : 206 8156 : print_func(" ----------------------------------------------------------------" 207 : "---------------\n", 208 : output_unit); 209 8156 : } 210 : 211 : // EOF