LCOV - code coverage report
Current view: top level - src/grid/common - grid_library.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 93.2 % 88 82
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 8 8

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : #include "grid_library.h"
       8              : #include "grid_common.h"
       9              : #include "grid_constants.h"
      10              : 
      11              : #include "../../mpiwrap/cp_mpi.h"
      12              : #include "../../offload/offload_runtime.h"
      13              : 
      14              : #include <assert.h>
      15              : #include <omp.h>
      16              : #include <stddef.h>
      17              : #include <stdio.h>
      18              : #include <stdlib.h>
      19              : #include <string.h>
      20              : 
      21              : #define GRID_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT)                               \
      22              :   ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
      23              : 
      24              : // counter dimensions
      25              : #define GRID_NBACKENDS 5
      26              : #define GRID_NKERNELS 4
      27              : #define GRID_MAX_LP 20
      28              : 
      29              : typedef struct {
      30              :   grid_sphere_cache sphere_cache;
      31              :   long counters[GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP];
      32              : } grid_library_globals;
      33              : 
      34              : static grid_library_globals **per_thread_globals = NULL;
      35              : static bool library_initialized = false;
      36              : static int max_threads = 0;
      37              : static grid_library_config config = {
      38              :     .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false};
      39              : 
      40              : #if !defined(_OPENMP)
      41              : #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
      42              : #endif
      43              : 
      44              : #if defined(NDEBUG)
      45              : #error                                                                         \
      46              :     "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
      47              : #endif
      48              : 
      49              : /*******************************************************************************
      50              :  * \brief Initializes the grid library.
      51              :  * \author Ole Schuett
      52              :  ******************************************************************************/
      53         9286 : void grid_library_init(void) {
      54         9286 :   if (library_initialized) {
      55            0 :     printf("Error: Grid library was already initialized.\n");
      56            0 :     abort();
      57              :   }
      58              : 
      59              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
      60              :   // Reserve global GPU memory for storing the intermediate Cab matrix blocks.
      61              :   // CUDA does not allow to increase this limit after a kernel was launched.
      62              :   // Unfortunately, the required memory is hard to predict because we neither
      63              :   // know which tasks will be run nor how many thread blocks the available GPU
      64              :   // can execute in parallel... 64 MiB ought to be enough for anybody ;-)
      65              :   offloadEnsureMallocHeapSize(64 * 1024 * 1024);
      66              : #endif
      67              : 
      68         9286 :   max_threads = omp_get_max_threads();
      69         9286 :   per_thread_globals = malloc(max_threads * sizeof(grid_library_globals *));
      70         9286 :   assert(per_thread_globals != NULL);
      71              : 
      72              : // Using parallel regions to ensure memory is allocated near a thread's core.
      73              : #pragma omp parallel default(none) shared(per_thread_globals)                  \
      74              :     num_threads(max_threads)
      75              :   {
      76              :     const int ithread = omp_get_thread_num();
      77              :     per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));
      78              :     assert(per_thread_globals[ithread] != NULL);
      79              :     memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));
      80              :   }
      81              : 
      82         9286 :   library_initialized = true;
      83         9286 : }
      84              : 
      85              : /*******************************************************************************
      86              :  * \brief Finalizes the grid library.
      87              :  * \author Ole Schuett
      88              :  ******************************************************************************/
      89         9286 : void grid_library_finalize(void) {
      90         9286 :   if (!library_initialized) {
      91            0 :     printf("Error: Grid library is not initialized.\n");
      92            0 :     abort();
      93              :   }
      94              : 
      95        18572 :   for (int i = 0; i < max_threads; i++) {
      96         9286 :     grid_sphere_cache_free(&per_thread_globals[i]->sphere_cache);
      97         9286 :     free(per_thread_globals[i]);
      98              :   }
      99         9286 :   free(per_thread_globals);
     100         9286 :   per_thread_globals = NULL;
     101         9286 :   library_initialized = false;
     102         9286 : }
     103              : 
     104              : /*******************************************************************************
     105              :  * \brief Returns a pointer to the thread local sphere cache.
     106              :  * \author Ole Schuett
     107              :  ******************************************************************************/
     108    128252625 : grid_sphere_cache *grid_library_get_sphere_cache(void) {
     109    128252625 :   const int ithread = omp_get_thread_num();
     110    128252625 :   assert(ithread < max_threads);
     111    128252625 :   return &per_thread_globals[ithread]->sphere_cache;
     112              : }
     113              : 
     114              : /*******************************************************************************
     115              :  * \brief Configures the grid library.
     116              :  * \author Ole Schuett
     117              :  ******************************************************************************/
     118         9402 : void grid_library_set_config(const enum grid_backend backend,
     119              :                              const bool validate, const bool apply_cutoff) {
     120         9402 :   config.backend = backend;
     121         9402 :   config.validate = validate;
     122         9402 :   config.apply_cutoff = apply_cutoff;
     123         9402 : }
     124              : 
     125              : /*******************************************************************************
     126              :  * \brief Returns the library config.
     127              :  * \author Ole Schuett
     128              :  ******************************************************************************/
     129       413238 : grid_library_config grid_library_get_config(void) { return config; }
     130              : 
     131              : /*******************************************************************************
     132              :  * \brief Adds given increment to counter specified by lp, backend, and kernel.
     133              :  * \author Ole Schuett
     134              :  ******************************************************************************/
     135    133992006 : void grid_library_counter_add(const int lp, const enum grid_backend backend,
     136              :                               const enum grid_library_kernel kernel,
     137              :                               const int increment) {
     138    133992006 :   assert(lp >= 0);
     139    133992006 :   assert(kernel < GRID_NKERNELS);
     140    133992006 :   const int back = backend - GRID_BACKEND_REF;
     141    133992006 :   assert(back < GRID_NBACKENDS);
     142    133992006 :   const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP +
     143    133992006 :                   imin(lp, GRID_MAX_LP - 1);
     144    133992006 :   const int ithread = omp_get_thread_num();
     145    133992006 :   assert(ithread < max_threads);
     146    133992006 :   per_thread_globals[ithread]->counters[idx] += increment;
     147    133992006 : }
     148              : 
     149              : /*******************************************************************************
     150              :  * \brief Comperator passed to qsort to compare two counters.
     151              :  * \author Ole Schuett
     152              :  ******************************************************************************/
     153     15786840 : static int compare_counters(const void *a, const void *b) {
     154     15786840 :   return *(long *)b - *(long *)a;
     155              : }
     156              : 
     157              : /*******************************************************************************
     158              :  * \brief Prints statistics gathered by the grid library.
     159              :  * \author Ole Schuett
     160              :  ******************************************************************************/
     161         9404 : void grid_library_print_stats(const int fortran_comm,
     162              :                               void (*print_func)(const char *, int, int),
     163         9404 :                               const int output_unit) {
     164         9404 :   if (!library_initialized) {
     165            0 :     printf("Error: Grid library is not initialized.\n");
     166            0 :     abort();
     167              :   }
     168              : 
     169         9404 :   const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);
     170              :   // Sum all counters across threads and mpi ranks.
     171         9404 :   const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP;
     172         9404 :   long counters[ncounters][2];
     173         9404 :   memset(counters, 0, ncounters * 2 * sizeof(long));
     174         9404 :   double total = 0.0;
     175      3771004 :   for (int i = 0; i < ncounters; i++) {
     176      3761600 :     counters[i][1] = i; // needed as inverse index after qsort
     177      7523200 :     for (int j = 0; j < max_threads; j++) {
     178      3761600 :       counters[i][0] += per_thread_globals[j]->counters[i];
     179              :     }
     180      3761600 :     cp_mpi_sum_long(&counters[i][0], 1, comm);
     181      3761600 :     total += counters[i][0];
     182              :   }
     183              : 
     184              :   // Sort counters.
     185         9404 :   qsort(counters, ncounters, 2 * sizeof(long), &compare_counters);
     186              : 
     187              :   // Determine if anything needs to be printed.
     188         9404 :   bool print = false;
     189      1805530 :   for (int i = 0; i < ncounters && !print; i++) {
     190      1796126 :     if (counters[i][0] != 0) {
     191         4926 :       print = true;
     192              :     }
     193              :   }
     194         9404 :   if (!print) {
     195         4478 :     return; // nothing to be printed
     196              :   }
     197              : 
     198              :   // Print counters.
     199         4926 :   GRID_LIBRARY_PRINT(print_func, "\n", output_unit);
     200         4926 :   GRID_LIBRARY_PRINT(
     201              :       print_func,
     202              :       " ----------------------------------------------------------------"
     203              :       "---------------\n",
     204              :       output_unit);
     205         4926 :   GRID_LIBRARY_PRINT(
     206              :       print_func,
     207              :       " -                                                               "
     208              :       "              -\n",
     209              :       output_unit);
     210         4926 :   GRID_LIBRARY_PRINT(
     211              :       print_func,
     212              :       " -                                GRID STATISTICS                "
     213              :       "              -\n",
     214              :       output_unit);
     215         4926 :   GRID_LIBRARY_PRINT(
     216              :       print_func,
     217              :       " -                                                               "
     218              :       "              -\n",
     219              :       output_unit);
     220         4926 :   GRID_LIBRARY_PRINT(
     221              :       print_func,
     222              :       " ----------------------------------------------------------------"
     223              :       "---------------\n",
     224              :       output_unit);
     225         4926 :   GRID_LIBRARY_PRINT(
     226              :       print_func,
     227              :       " LP    KERNEL             BACKEND                              "
     228              :       "COUNT     PERCENT\n",
     229              :       output_unit);
     230              : 
     231         4926 :   const char *kernel_names[] = {"collocate ortho", "integrate ortho",
     232              :                                 "collocate general", "integrate general"};
     233         4926 :   const char *backend_names[] = {"REF", "CPU", "DGEMM", "GPU", "HIP"};
     234              : 
     235      1975326 :   for (int i = 0; i < ncounters; i++) {
     236      1970400 :     if (counters[i][0] == 0)
     237      1919088 :       continue; // skip empty counters
     238        51312 :     const double percent = 100.0 * counters[i][0] / total;
     239        51312 :     const int idx = counters[i][1];
     240        51312 :     const int backend_stride = GRID_NKERNELS * GRID_MAX_LP;
     241        51312 :     const int back = idx / backend_stride;
     242        51312 :     const int kern = (idx % backend_stride) / GRID_MAX_LP;
     243        51312 :     const int lp = (idx % backend_stride) % GRID_MAX_LP;
     244        51312 :     char buffer[100];
     245        51312 :     snprintf(buffer, sizeof(buffer), " %-5i %-17s  %-6s  %34li %10.2f%%\n", lp,
     246              :              kernel_names[kern], backend_names[back], counters[i][0], percent);
     247        51312 :     GRID_LIBRARY_PRINT(print_func, buffer, output_unit);
     248              :   }
     249              : 
     250         4926 :   GRID_LIBRARY_PRINT(
     251              :       print_func,
     252              :       " ----------------------------------------------------------------"
     253              :       "---------------\n",
     254              :       output_unit);
     255              : }
     256              : 
     257              : // EOF
        

Generated by: LCOV version 2.0-1