LCOV - code coverage report
Current view: top level - src/grid/common - grid_library.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:1a29073) Lines: 75 81 92.6 %
Date: 2024-04-17 06:30:47 Functions: 8 8 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2024 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <omp.h>
      10             : #include <stddef.h>
      11             : #include <stdio.h>
      12             : #include <stdlib.h>
      13             : #include <string.h>
      14             : 
      15             : #include "../../offload/offload_runtime.h"
      16             : #include "grid_common.h"
      17             : #include "grid_constants.h"
      18             : #include "grid_library.h"
      19             : 
      20             : // counter dimensions
      21             : #define GRID_NBACKENDS 5
      22             : #define GRID_NKERNELS 4
      23             : #define GRID_MAX_LP 20
      24             : 
      25             : typedef struct {
      26             :   grid_sphere_cache sphere_cache;
      27             :   long counters[GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP];
      28             : } grid_library_globals;
      29             : 
      30             : static grid_library_globals **per_thread_globals = NULL;
      31             : static bool library_initialized = false;
      32             : static int max_threads = 0;
      33             : static grid_library_config config = {
      34             :     .backend = GRID_BACKEND_AUTO, .validate = false, .apply_cutoff = false};
      35             : 
      36             : #if !defined(_OPENMP)
      37             : #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
      38             : #endif
      39             : 
      40             : #if defined(NDEBUG)
      41             : #error                                                                         \
      42             :     "Please do not build CP2K with NDEBUG. There is no performance advantage and asserts will save your neck."
      43             : #endif
      44             : 
      45             : /*******************************************************************************
      46             :  * \brief Initializes the grid library.
      47             :  * \author Ole Schuett
      48             :  ******************************************************************************/
      49        8394 : void grid_library_init(void) {
      50        8394 :   if (library_initialized) {
      51           0 :     printf("Error: Grid library was already initialized.\n");
      52           0 :     abort();
      53             :   }
      54             : 
      55             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
      56             :   // Reserve global GPU memory for storing the intermediate Cab matrix blocks.
      57             :   // CUDA does not allow to increase this limit after a kernel was launched.
      58             :   // Unfortunately, the required memory is hard to predict because we neither
      59             :   // know which tasks will be run nor how many thread blocks the available GPU
      60             :   // can execute in parallel... 64 MiB ought to be enough for anybody ;-)
      61             :   offloadEnsureMallocHeapSize(64 * 1024 * 1024);
      62             : #endif
      63             : 
      64        8394 :   max_threads = omp_get_max_threads();
      65        8394 :   per_thread_globals = malloc(max_threads * sizeof(grid_library_globals *));
      66             : 
      67             : // Using parallel regions to ensure memory is allocated near a thread's core.
      68             : #pragma omp parallel default(none) shared(per_thread_globals)                  \
      69             :     num_threads(max_threads)
      70             :   {
      71             :     const int ithread = omp_get_thread_num();
      72             :     per_thread_globals[ithread] = malloc(sizeof(grid_library_globals));
      73             :     memset(per_thread_globals[ithread], 0, sizeof(grid_library_globals));
      74             :   }
      75             : 
      76        8394 :   library_initialized = true;
      77        8394 : }
      78             : 
      79             : /*******************************************************************************
      80             :  * \brief Finalizes the grid library.
      81             :  * \author Ole Schuett
      82             :  ******************************************************************************/
      83        8394 : void grid_library_finalize(void) {
      84        8394 :   if (!library_initialized) {
      85           0 :     printf("Error: Grid library is not initialized.\n");
      86           0 :     abort();
      87             :   }
      88             : 
      89       16788 :   for (int i = 0; i < max_threads; i++) {
      90        8394 :     grid_sphere_cache_free(&per_thread_globals[i]->sphere_cache);
      91        8394 :     free(per_thread_globals[i]);
      92             :   }
      93        8394 :   free(per_thread_globals);
      94        8394 :   per_thread_globals = NULL;
      95        8394 :   library_initialized = false;
      96        8394 : }
      97             : 
      98             : /*******************************************************************************
      99             :  * \brief Returns a pointer to the thread local sphere cache.
     100             :  * \author Ole Schuett
     101             :  ******************************************************************************/
     102   119694836 : grid_sphere_cache *grid_library_get_sphere_cache(void) {
     103   119694836 :   const int ithread = omp_get_thread_num();
     104   119694836 :   assert(ithread < max_threads);
     105   119694836 :   return &per_thread_globals[ithread]->sphere_cache;
     106             : }
     107             : 
     108             : /*******************************************************************************
     109             :  * \brief Configures the grid library.
     110             :  * \author Ole Schuett
     111             :  ******************************************************************************/
     112        8510 : void grid_library_set_config(const enum grid_backend backend,
     113             :                              const bool validate, const bool apply_cutoff) {
     114        8510 :   config.backend = backend;
     115        8510 :   config.validate = validate;
     116        8510 :   config.apply_cutoff = apply_cutoff;
     117        8510 : }
     118             : 
     119             : /*******************************************************************************
     120             :  * \brief Returns the library config.
     121             :  * \author Ole Schuett
     122             :  ******************************************************************************/
     123      383392 : grid_library_config grid_library_get_config(void) { return config; }
     124             : 
     125             : /*******************************************************************************
     126             :  * \brief Adds given increment to counter specified by lp, backend, and kernel.
     127             :  * \author Ole Schuett
     128             :  ******************************************************************************/
     129   125043140 : void grid_library_counter_add(const int lp, const enum grid_backend backend,
     130             :                               const enum grid_library_kernel kernel,
     131             :                               const int increment) {
     132   125043140 :   assert(lp >= 0);
     133   125043140 :   assert(kernel < GRID_NKERNELS);
     134   125043140 :   const int back = backend - GRID_BACKEND_REF;
     135   125043140 :   assert(back < GRID_NBACKENDS);
     136   125043140 :   const int idx = back * GRID_NKERNELS * GRID_MAX_LP + kernel * GRID_MAX_LP +
     137   125043140 :                   imin(lp, GRID_MAX_LP - 1);
     138   125043140 :   const int ithread = omp_get_thread_num();
     139   125043140 :   assert(ithread < max_threads);
     140   125043140 :   per_thread_globals[ithread]->counters[idx] += increment;
     141   125043140 : }
     142             : 
     143             : /*******************************************************************************
     144             :  * \brief Comperator passed to qsort to compare two counters.
     145             :  * \author Ole Schuett
     146             :  ******************************************************************************/
     147    14296602 : static int compare_counters(const void *a, const void *b) {
     148    14296602 :   return *(long *)b - *(long *)a;
     149             : }
     150             : 
     151             : /*******************************************************************************
     152             :  * \brief Prints statistics gathered by the grid library.
     153             :  * \author Ole Schuett
     154             :  ******************************************************************************/
     155        8512 : void grid_library_print_stats(void (*mpi_sum_func)(long *, int),
     156             :                               const int mpi_comm,
     157             :                               void (*print_func)(char *, int),
     158        8512 :                               const int output_unit) {
     159        8512 :   if (!library_initialized) {
     160           0 :     printf("Error: Grid library is not initialized.\n");
     161           0 :     abort();
     162             :   }
     163             : 
     164             :   // Sum all counters across threads and mpi ranks.
     165        8512 :   const int ncounters = GRID_NBACKENDS * GRID_NKERNELS * GRID_MAX_LP;
     166        8512 :   long counters[ncounters][2];
     167        8512 :   memset(counters, 0, ncounters * 2 * sizeof(long));
     168        8512 :   double total = 0.0;
     169     3413312 :   for (int i = 0; i < ncounters; i++) {
     170     3404800 :     counters[i][1] = i; // needed as inverse index after qsort
     171     6809600 :     for (int j = 0; j < max_threads; j++) {
     172     3404800 :       counters[i][0] += per_thread_globals[j]->counters[i];
     173             :     }
     174     3404800 :     mpi_sum_func(&counters[i][0], mpi_comm);
     175     3404800 :     total += counters[i][0];
     176             :   }
     177             : 
     178             :   // Sort counters.
     179        8512 :   qsort(counters, ncounters, 2 * sizeof(long), &compare_counters);
     180             : 
     181             :   // Print counters.
     182        8512 :   print_func("\n", output_unit);
     183        8512 :   print_func(" ----------------------------------------------------------------"
     184             :              "---------------\n",
     185             :              output_unit);
     186        8512 :   print_func(" -                                                               "
     187             :              "              -\n",
     188             :              output_unit);
     189        8512 :   print_func(" -                                GRID STATISTICS                "
     190             :              "              -\n",
     191             :              output_unit);
     192        8512 :   print_func(" -                                                               "
     193             :              "              -\n",
     194             :              output_unit);
     195        8512 :   print_func(" ----------------------------------------------------------------"
     196             :              "---------------\n",
     197             :              output_unit);
     198        8512 :   print_func(" LP    KERNEL             BACKEND                              "
     199             :              "COUNT     PERCENT\n",
     200             :              output_unit);
     201             : 
     202        8512 :   const char *kernel_names[] = {"collocate ortho", "integrate ortho",
     203             :                                 "collocate general", "integrate general"};
     204        8512 :   const char *backend_names[] = {"REF", "CPU", "DGEMM", "GPU", "HIP"};
     205             : 
     206     3413312 :   for (int i = 0; i < ncounters; i++) {
     207     3404800 :     if (counters[i][0] == 0)
     208     3355778 :       continue; // skip empty counters
     209       49022 :     const double percent = 100.0 * counters[i][0] / total;
     210       49022 :     const int idx = counters[i][1];
     211       49022 :     const int backend_stride = GRID_NKERNELS * GRID_MAX_LP;
     212       49022 :     const int back = idx / backend_stride;
     213       49022 :     const int kern = (idx % backend_stride) / GRID_MAX_LP;
     214       49022 :     const int lp = (idx % backend_stride) % GRID_MAX_LP;
     215       49022 :     char buffer[100];
     216       49022 :     snprintf(buffer, sizeof(buffer), " %-5i %-17s  %-6s  %34li %10.2f%%\n", lp,
     217             :              kernel_names[kern], backend_names[back], counters[i][0], percent);
     218       49022 :     print_func(buffer, output_unit);
     219             :   }
     220             : 
     221        8512 :   print_func(" ----------------------------------------------------------------"
     222             :              "---------------\n",
     223             :              output_unit);
     224        8512 : }
     225             : 
     226             : // EOF

Generated by: LCOV version 1.15