LCOV - code coverage report
Current view: top level - src/dbm - dbm_library.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:85b8a9b) Lines: 91.9 % 74 68
Test Date: 2026-06-14 06:48:14 Functions: 100.0 % 6 6

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : #include "dbm_library.h"
       8              : #include "../mpiwrap/cp_mpi.h"
       9              : #include "../offload/offload_library.h"
      10              : #include "../offload/offload_mempool.h"
      11              : 
      12              : #include <assert.h>
      13              : #include <inttypes.h>
      14              : #include <omp.h>
      15              : #include <stdio.h>
      16              : #include <stdlib.h>
      17              : #include <string.h>
      18              : 
      19              : #define DBM_LIBRARY_PRINT(FN, MSG, OUTPUT_UNIT)                                \
      20              :   ((FN)(MSG, (int)strlen(MSG), OUTPUT_UNIT))
      21              : #define DBM_NUM_COUNTERS 64
      22              : 
      23              : static int64_t **per_thread_counters = NULL;
      24              : static bool library_initialized = false;
      25              : static int max_threads = 0;
      26              : 
      27              : #if !defined(_OPENMP)
      28              : #error "OpenMP is required. Please add -fopenmp to your C compiler flags."
      29              : #endif
      30              : 
      31              : /*******************************************************************************
      32              :  * \brief Initializes the DBM library.
      33              :  * \author Ole Schuett
      34              :  ******************************************************************************/
      35        10328 : void dbm_library_init(void) {
      36        10328 :   assert(omp_get_num_threads() == 1);
      37              : 
      38        10328 :   if (library_initialized) {
      39            0 :     fprintf(stderr, "DBM library was already initialized.\n");
      40            0 :     abort();
      41              :   }
      42              : 
      43        10328 :   offload_init();
      44              : 
      45        10328 :   max_threads = omp_get_max_threads();
      46        10328 :   per_thread_counters = malloc(max_threads * sizeof(int64_t *));
      47        10328 :   assert(per_thread_counters != NULL);
      48              : 
      49              :   // Using parallel regions to ensure memory is allocated near a thread's core.
      50              : #pragma omp parallel default(none) shared(per_thread_counters)                 \
      51              :     num_threads(max_threads)
      52              :   {
      53              :     const int ithread = omp_get_thread_num();
      54              :     const size_t counters_size = DBM_NUM_COUNTERS * sizeof(int64_t);
      55              :     per_thread_counters[ithread] = malloc(counters_size);
      56              :     assert(per_thread_counters[ithread] != NULL);
      57              :     memset(per_thread_counters[ithread], 0, counters_size);
      58              :   }
      59              : 
      60        10328 :   library_initialized = true;
      61        10328 : }
      62              : 
      63              : /*******************************************************************************
      64              :  * \brief Finalizes the DBM library.
      65              :  * \author Ole Schuett
      66              :  ******************************************************************************/
      67        10328 : void dbm_library_finalize(void) {
      68        10328 :   assert(omp_get_num_threads() == 1);
      69              : 
      70        10328 :   if (!library_initialized) {
      71            0 :     fprintf(stderr, "Error: DBM library is not initialized.\n");
      72            0 :     abort();
      73              :   }
      74              : 
      75        20656 :   for (int i = 0; i < max_threads; i++) {
      76        10328 :     free(per_thread_counters[i]);
      77              :   }
      78        10328 :   free(per_thread_counters);
      79        10328 :   per_thread_counters = NULL;
      80              : 
      81        10328 :   offload_mempool_clear();
      82        10328 :   library_initialized = false;
      83        10328 : }
      84              : 
      85              : /*******************************************************************************
      86              :  * \brief Computes min(3, floor(log10(x))).
      87              :  * \author Ole Schuett
      88              :  ******************************************************************************/
      89     87063477 : static int floorlog10(const int x) {
      90     87063477 :   return (100 <= x ? (1000 <= x ? 3 : 2) : (10 <= x ? 1 : 0));
      91              : }
      92              : 
      93              : /*******************************************************************************
      94              :  * \brief Add given block multiplication to stats. This routine is thread-safe.
      95              :  * \author Ole Schuett
      96              :  ******************************************************************************/
      97     29021159 : void dbm_library_counter_increment(const int m, const int n, const int k) {
      98     29021159 :   const int ithread = omp_get_thread_num();
      99     29021159 :   assert(ithread < max_threads);
     100     29021159 :   const int idx = 16 * floorlog10(m) + 4 * floorlog10(n) + floorlog10(k);
     101     29021159 :   per_thread_counters[ithread][idx]++;
     102     29021159 : }
     103              : 
     104              : /*******************************************************************************
     105              :  * \brief Comperator passed to qsort to compare two counters.
     106              :  * \author Ole Schuett
     107              :  ******************************************************************************/
     108      2008588 : static int compare_counters(const void *a, const void *b) {
     109      2008588 :   return *(const int64_t *)b - *(const int64_t *)a;
     110              : }
     111              : 
     112              : /*******************************************************************************
     113              :  * \brief Prints statistics gathered by the DBM library.
     114              :  * \author Ole Schuett
     115              :  ******************************************************************************/
     116        10446 : void dbm_library_print_stats(const int fortran_comm,
     117              :                              void (*print_func)(const char *, int, int),
     118              :                              const int output_unit) {
     119        10446 :   assert(omp_get_num_threads() == 1);
     120              : 
     121        10446 :   if (!library_initialized) {
     122            0 :     fprintf(stderr, "Error: DBM library is not initialized.\n");
     123            0 :     abort();
     124              :   }
     125              : 
     126        10446 :   const cp_mpi_comm_t comm = cp_mpi_comm_f2c(fortran_comm);
     127              :   // Sum all counters across threads and mpi ranks.
     128        10446 :   int64_t counters[DBM_NUM_COUNTERS][2] = {{0}};
     129        10446 :   double total = 0.0;
     130       678990 :   for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
     131       668544 :     counters[i][1] = i; // needed as inverse index after qsort
     132      1337088 :     for (int j = 0; j < max_threads; j++) {
     133       668544 :       counters[i][0] += per_thread_counters[j][i];
     134              :     }
     135       668544 :     cp_mpi_sum_int64(&counters[i][0], 1, comm);
     136       668544 :     total += counters[i][0];
     137              :   }
     138              : 
     139              :   // Sort counters.
     140        10446 :   qsort(counters, DBM_NUM_COUNTERS, 2 * sizeof(int64_t), &compare_counters);
     141              : 
     142              :   // Determine if anything needs to be printed.
     143        10446 :   bool print = false;
     144       655680 :   for (int i = 0; i < DBM_NUM_COUNTERS && !print; i++) {
     145       645234 :     if (counters[i][0] != 0) {
     146          370 :       print = true;
     147              :     }
     148              :   }
     149        10446 :   if (!print) {
     150        10076 :     return; // nothing to be printed
     151              :   }
     152              : 
     153              :   // Print counters.
     154          370 :   DBM_LIBRARY_PRINT(print_func, "\n", output_unit);
     155          370 :   DBM_LIBRARY_PRINT(
     156              :       print_func,
     157              :       " ----------------------------------------------------------------"
     158              :       "---------------\n",
     159              :       output_unit);
     160          370 :   DBM_LIBRARY_PRINT(
     161              :       print_func,
     162              :       " -                                                               "
     163              :       "              -\n",
     164              :       output_unit);
     165          370 :   DBM_LIBRARY_PRINT(
     166              :       print_func,
     167              :       " -                                DBM STATISTICS                 "
     168              :       "              -\n",
     169              :       output_unit);
     170          370 :   DBM_LIBRARY_PRINT(
     171              :       print_func,
     172              :       " -                                                               "
     173              :       "              -\n",
     174              :       output_unit);
     175          370 :   DBM_LIBRARY_PRINT(
     176              :       print_func,
     177              :       " ----------------------------------------------------------------"
     178              :       "---------------\n",
     179              :       output_unit);
     180          370 :   DBM_LIBRARY_PRINT(
     181              :       print_func,
     182              :       "    M  x    N  x    K                                          "
     183              :       "COUNT     PERCENT\n",
     184              :       output_unit);
     185              : 
     186          370 :   const char *labels[] = {"?", "??", "???", ">999"};
     187          370 :   char buffer[100];
     188        24050 :   for (int i = 0; i < DBM_NUM_COUNTERS; i++) {
     189        23680 :     if (counters[i][0] == 0) {
     190        20764 :       continue; // skip empty counters
     191              :     }
     192         2916 :     const double percent = 100.0 * counters[i][0] / total;
     193         2916 :     const int idx = counters[i][1];
     194         2916 :     const int m = (idx % 64) / 16;
     195         2916 :     const int n = (idx % 16) / 4;
     196         2916 :     const int k = (idx % 4) / 1;
     197         2916 :     snprintf(buffer, sizeof(buffer),
     198              :              " %4s  x %4s  x %4s %46" PRId64 " %10.2f%%\n", labels[m],
     199              :              labels[n], labels[k], counters[i][0], percent);
     200         2916 :     DBM_LIBRARY_PRINT(print_func, buffer, output_unit);
     201              :   }
     202              : 
     203          370 :   DBM_LIBRARY_PRINT(
     204              :       print_func,
     205              :       " ----------------------------------------------------------------"
     206              :       "---------------\n",
     207              :       output_unit);
     208              : }
     209              : 
     210              : // EOF
        

Generated by: LCOV version 2.0-1