LCOV - code coverage report
Current view: top level - src/dbm - dbm_shard.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:c24029e) Lines: 99.3 % 141 140
Test Date: 2026-07-04 06:36:57 Functions: 100.0 % 12 12

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : #include "dbm_shard.h"
       8              : #include "../offload/offload_mempool.h"
       9              : #include "dbm_hyperparams.h"
      10              : 
      11              : #include <assert.h>
      12              : #include <omp.h>
      13              : #include <stdbool.h>
      14              : #include <stddef.h>
      15              : #include <stdlib.h>
      16              : #include <string.h>
      17              : 
      18              : /*******************************************************************************
      19              :  * \brief Internal routine for finding a power of two greater than given number.
      20              :  * \author Ole Schuett
      21              :  ******************************************************************************/
      22      8992949 : static int next_power2(const int start) {
      23      8992949 :   int candidate = 2;
      24     32429023 :   while (candidate < start) {
      25     23436074 :     candidate *= 2;
      26              :   }
      27      8992949 :   return candidate;
      28              : }
      29              : 
      30              : /*******************************************************************************
      31              :  * \brief Internal routine for finding a prime greater equal than given number.
      32              :  * \author Ole Schuett
      33              :  ******************************************************************************/
      34      8992949 : static int next_prime(const int start) {
      35      8992949 :   int candidate = start, divisor = 0;
      36     35344689 :   while (divisor < candidate) {
      37    547737015 :     for (divisor = 2; divisor < candidate; divisor++) {
      38    538744066 :       if (candidate % divisor == 0) {
      39     17358791 :         candidate++;
      40     17358791 :         break;
      41              :       }
      42              :     }
      43              :   }
      44      8992949 :   return candidate;
      45              : }
      46              : 
      47              : /*******************************************************************************
      48              :  * \brief Internal routine for initializing a shard's hashtable.
      49              :  * \author Ole Schuett
      50              :  ******************************************************************************/
      51      8992949 : static void hashtable_init(dbm_shard_t *shard) {
      52              :   // Choosing size as power of two allows to replace modulo with bitwise AND.
      53     17985898 :   shard->hashtable_size =
      54      8992949 :       next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated);
      55      8992949 :   shard->hashtable_prime = next_prime(shard->hashtable_size);
      56      8992949 :   shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
      57      8992949 :   assert(shard->hashtable != NULL);
      58      8992949 : }
      59              : 
      60              : /*******************************************************************************
      61              :  * \brief Internal routine for initializing a shard.
      62              :  * \author Ole Schuett
      63              :  ******************************************************************************/
      64      1959766 : void dbm_shard_init(dbm_shard_t *shard) {
      65      1959766 :   shard->nblocks = 0;
      66      1959766 :   shard->nblocks_allocated = 0;
      67      1959766 :   shard->blocks = NULL;
      68      1959766 :   hashtable_init(shard);
      69      1959766 :   shard->data_size = 0;
      70      1959766 :   shard->data_promised = 0;
      71      1959766 :   shard->data_allocated = 0;
      72      1959766 :   shard->data = NULL;
      73      1959766 :   omp_init_lock(&shard->lock);
      74      1959766 : }
      75              : 
      76              : /*******************************************************************************
      77              :  * \brief Internal routine for copying content of shard_b into shard_a.
      78              :  * \author Ole Schuett
      79              :  ******************************************************************************/
      80       489662 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
      81       489662 :   assert(shard_a != NULL && shard_b != NULL);
      82              : 
      83       489662 :   if (shard_a->nblocks_allocated < shard_b->nblocks) {
      84       461859 :     free(shard_a->blocks);
      85       461859 :     shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
      86       461859 :     shard_a->nblocks_allocated = shard_b->nblocks;
      87       461859 :     assert(shard_a->blocks != NULL);
      88              :   }
      89       489662 :   shard_a->nblocks = shard_b->nblocks;
      90              : 
      91       489662 :   if (shard_a->hashtable_size < shard_b->hashtable_size) {
      92       464678 :     free(shard_a->hashtable);
      93       464678 :     shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
      94       464678 :     assert(shard_a->hashtable != NULL);
      95              :   }
      96       489662 :   shard_a->hashtable_size = shard_b->hashtable_size;
      97       489662 :   shard_a->hashtable_prime = shard_b->hashtable_prime;
      98              : 
      99       489662 :   if (shard_a->data_allocated < shard_b->data_size) {
     100       461859 :     offload_mempool_host_free(shard_a->data);
     101       923718 :     shard_a->data =
     102       461859 :         offload_mempool_host_malloc(shard_b->data_size * sizeof(double));
     103       461859 :     shard_a->data_allocated = shard_b->data_size;
     104       461859 :     assert(shard_a->data != NULL);
     105              :   }
     106       489662 :   shard_a->data_size = shard_b->data_size;
     107              : 
     108       489662 :   if (shard_b->nblocks != 0) {
     109       461889 :     assert(shard_a->blocks != NULL && shard_b->blocks != NULL);
     110       461889 :     memcpy(shard_a->blocks, shard_b->blocks,
     111       461889 :            shard_b->nblocks * sizeof(dbm_block_t));
     112              :   }
     113       489662 :   if (shard_b->hashtable_size != 0) {
     114       489662 :     assert(shard_a->hashtable != NULL && shard_b->hashtable != NULL);
     115       489662 :     memcpy(shard_a->hashtable, shard_b->hashtable,
     116       489662 :            shard_b->hashtable_size * sizeof(int));
     117              :   }
     118       489662 :   if (shard_b->data_size != 0) {
     119       461889 :     assert(shard_a->data != NULL && shard_b->data != NULL);
     120       461889 :     memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
     121              :   }
     122       489662 : }
     123              : 
     124              : /*******************************************************************************
     125              :  * \brief Internal routine for releasing a shard.
     126              :  * \author Ole Schuett
     127              :  ******************************************************************************/
     128      1959766 : void dbm_shard_release(dbm_shard_t *shard) {
     129      1959766 :   free(shard->blocks);
     130      1959766 :   free(shard->hashtable);
     131      1959766 :   offload_mempool_host_free(shard->data);
     132      1959766 :   omp_destroy_lock(&shard->lock);
     133      1959766 : }
     134              : 
     135              : /*******************************************************************************
     136              :  * \brief Private hash function based on Cantor pairing function.
     137              :  *        https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
     138              :  *        Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
     139              :  *        Using unsigned int to return a positive number even after overflow.
     140              :  * \author Ole Schuett
     141              :  ******************************************************************************/
     142    269986437 : static inline unsigned int hash(const unsigned int row,
     143              :                                 const unsigned int col) {
     144    269986437 :   return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
     145              : }
     146              : 
     147              : /*******************************************************************************
     148              :  * \brief Internal routine for masking a slot in the hash-table.
     149              :  * \author Hans Pabst
     150              :  ******************************************************************************/
     151    269986437 : static inline int hashtable_mask(const dbm_shard_t *shard) {
     152    269986437 :   return shard->hashtable_size - 1;
     153              : }
     154              : 
     155              : /*******************************************************************************
     156              :  * \brief Private routine for inserting a block into a shard's hashtable.
     157              :  * \author Ole Schuett
     158              :  ******************************************************************************/
     159    144989872 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
     160    144989872 :   assert(0 <= block_idx && block_idx < shard->nblocks);
     161    144989872 :   const dbm_block_t *blk = &shard->blocks[block_idx];
     162    144989872 :   const unsigned int h = hash(blk->row, blk->col);
     163              :   // Bounded probe: at most hashtable_size steps (load factor < 1 guarantees
     164              :   // termination). Avoids unbounded scans when the slot variable wrapped around
     165              :   // via the mask but the inner iteration continued past the table end.
     166    144989872 :   int slot = (shard->hashtable_prime * h) & hashtable_mask(shard);
     167    154598156 :   for (int i = 0; i < shard->hashtable_size; ++i) { // linear probing
     168    154598156 :     if (shard->hashtable[slot] == 0) {              // 0 means empty
     169    144989872 :       shard->hashtable[slot] = block_idx + 1;       // 1-based
     170    144989872 :       return;
     171              :     }
     172      9608284 :     slot = (slot + 1) & hashtable_mask(shard);
     173              :   }
     174            0 :   assert(false);
     175              : }
     176              : 
     177              : /*******************************************************************************
     178              :  * \brief Internal routine for looking up a block from a shard.
     179              :  * \author Ole Schuett
     180              :  ******************************************************************************/
     181    124996565 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
     182              :                               const int col) {
     183              :   // Bounded probe count prevents scanning the entire table on a miss when
     184              :   // clusters exist (previous code could re-enter via slot wrap and re-scan).
     185    124996565 :   int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard);
     186    133881917 :   for (int i = 0; i < shard->hashtable_size; ++i) { // linear probing
     187    133881917 :     const int block_idx = shard->hashtable[slot];
     188    133881917 :     if (block_idx == 0) { // 1-based, 0 means empty
     189              :       return NULL;        // block not found
     190              :     }
     191     86973562 :     assert(0 < block_idx && block_idx <= shard->nblocks);
     192     86973562 :     dbm_block_t *blk = &shard->blocks[block_idx - 1];
     193     86973562 :     if (blk->row == row && blk->col == col) {
     194              :       return blk;
     195              :     }
     196      8885352 :     slot = (slot + 1) & hashtable_mask(shard);
     197              :   }
     198              :   return NULL;
     199              : }
     200              : 
     201              : /*******************************************************************************
     202              :  * \brief Internal routine for allocating the metadata of a new block.
     203              :  * \author Ole Schuett
     204              :  ******************************************************************************/
     205     57364476 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
     206              :                                          const int col, const int block_size) {
     207              :   // Grow blocks array if necessary.
     208     57364476 :   if (shard->nblocks_allocated < shard->nblocks + 1) {
     209      7033183 :     shard->nblocks_allocated = DBM_ALLOCATION_FACTOR * (shard->nblocks + 1);
     210      7033183 :     assert((shard->nblocks + 1) <= shard->nblocks_allocated);
     211      7033183 :     shard->blocks =
     212      7033183 :         realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
     213      7033183 :     assert(shard->blocks != NULL);
     214              : 
     215              :     // rebuild hashtable
     216      7033183 :     free(shard->hashtable);
     217      7033183 :     hashtable_init(shard);
     218     94658579 :     for (int i = 0; i < shard->nblocks; i++) {
     219     87625396 :       hashtable_insert(shard, i);
     220              :     }
     221              :   }
     222              : 
     223     57364476 :   const int new_block_idx = shard->nblocks;
     224     57364476 :   shard->nblocks++;
     225     57364476 :   dbm_block_t *new_block = &shard->blocks[new_block_idx];
     226     57364476 :   new_block->row = row;
     227     57364476 :   new_block->col = col;
     228     57364476 :   new_block->offset = shard->data_promised;
     229     57364476 :   shard->data_promised += block_size;
     230              :   // The data_size will be increased after the memory is allocated and zeroed.
     231     57364476 :   hashtable_insert(shard, new_block_idx);
     232     57364476 :   return new_block;
     233              : }
     234              : 
     235              : /*******************************************************************************
     236              :  * \brief Internal routine for allocating and zeroing any promised block's data.
     237              :  * \author Ole Schuett
     238              :  ******************************************************************************/
     239     12709772 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
     240              :   // Reallocate data array if necessary.
     241     12709772 :   if (shard->data_allocated < shard->data_promised) {
     242      1824228 :     const double *data = shard->data;
     243      1824228 :     shard->data_allocated = DBM_ALLOCATION_FACTOR * shard->data_promised;
     244      1824228 :     assert(shard->data_promised <= shard->data_allocated);
     245      3648456 :     shard->data =
     246      1824228 :         offload_mempool_host_malloc(shard->data_allocated * sizeof(double));
     247      1824228 :     assert(shard->data != NULL);
     248      1824228 :     if (data != NULL) {
     249       571029 :       memcpy(shard->data, data, shard->data_size * sizeof(double));
     250       571029 :       offload_mempool_host_free(data);
     251              :     }
     252              :   }
     253              : 
     254              :   // Zero new blocks.
     255              :   // The following memset is usually the first touch of the memory, which leads
     256              :   // to frequent page faults. The executing thread determines the NUMA location
     257     12709772 :   if (shard->data_size < shard->data_promised) {
     258     12341152 :     const int tail = shard->data_promised - shard->data_size;
     259     12341152 :     memset(shard->data + shard->data_size, 0, tail * sizeof(double));
     260     12341152 :     shard->data_size = shard->data_promised;
     261              :   }
     262     12709772 : }
     263              : 
     264              : /*******************************************************************************
     265              :  * \brief Internal routine for getting block or promising a new one.
     266              :  * \author Ole Schuett
     267              :  ******************************************************************************/
     268     31108986 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
     269              :                                             const int col,
     270              :                                             const int block_size) {
     271     31108986 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     272     31108986 :   if (existing_blk != NULL) {
     273              :     return existing_blk;
     274              :   } else {
     275     28823477 :     return dbm_shard_promise_new_block(shard, row, col, block_size);
     276              :   }
     277              : }
     278              : 
     279              : /*******************************************************************************
     280              :  * \brief Internal routine for getting block or allocating a new one.
     281              :  * \author Ole Schuett
     282              :  ******************************************************************************/
     283     41640491 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
     284              :                                              const int col,
     285              :                                              const int block_size) {
     286     41640491 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     287     41640491 :   if (existing_blk != NULL) {
     288              :     return existing_blk;
     289              :   }
     290              : 
     291              :   // Create a new block.
     292     10856936 :   dbm_block_t *new_blk =
     293     10856936 :       dbm_shard_promise_new_block(shard, row, col, block_size);
     294     10856936 :   dbm_shard_allocate_promised_blocks(shard);
     295              : 
     296     10856936 :   return new_blk;
     297              : }
     298              : 
     299              : // EOF
        

Generated by: LCOV version 2.0-1