LCOV - code coverage report
Current view: top level - src/dbm - dbm_shard.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:a2cdc02) Lines: 136 136 100.0 %
Date: 2025-04-17 08:15:26 Functions: 12 12 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : #include <assert.h>
       8             : #include <omp.h>
       9             : #include <stdbool.h>
      10             : #include <stddef.h>
      11             : #include <stdlib.h>
      12             : #include <string.h>
      13             : 
      14             : #include "dbm_hyperparams.h"
      15             : #include "dbm_mempool.h"
      16             : #include "dbm_shard.h"
      17             : 
      18             : /*******************************************************************************
      19             :  * \brief Internal routine for finding a power of two greater than given number.
      20             :  * \author Ole Schuett
      21             :  ******************************************************************************/
      22     6604525 : static int next_power2(const int start) {
      23     6604525 :   int candidate = 2;
      24    23684473 :   while (candidate < start) {
      25    17079948 :     candidate *= 2;
      26             :   }
      27     6604525 :   return candidate;
      28             : }
      29             : 
      30             : /*******************************************************************************
      31             :  * \brief Internal routine for finding a prime greater equal than given number.
      32             :  * \author Ole Schuett
      33             :  ******************************************************************************/
      34     6604525 : static int next_prime(const int start) {
      35     6604525 :   int candidate = start, divisor = 0;
      36    25974886 :   while (divisor < candidate) {
      37   440490010 :     for (divisor = 2; divisor < candidate; divisor++) {
      38   433885485 :       if (candidate % divisor == 0) {
      39    12765836 :         candidate++;
      40    12765836 :         break;
      41             :       }
      42             :     }
      43             :   }
      44     6604525 :   return candidate;
      45             : }
      46             : 
      47             : /*******************************************************************************
      48             :  * \brief Internal routine for initializing a shard's hashtable.
      49             :  * \author Ole Schuett
      50             :  ******************************************************************************/
      51     6604525 : static void hashtable_init(dbm_shard_t *shard) {
      52             :   // Choosing size as power of two allows to replace modulo with bitwise AND.
      53    13209050 :   shard->hashtable_size =
      54     6604525 :       next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated);
      55     6604525 :   shard->hashtable_prime = next_prime(shard->hashtable_size);
      56     6604525 :   shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
      57     6604525 :   assert(shard->hashtable != NULL);
      58     6604525 : }
      59             : 
      60             : /*******************************************************************************
      61             :  * \brief Internal routine for initializing a shard.
      62             :  * \author Ole Schuett
      63             :  ******************************************************************************/
      64     1615183 : void dbm_shard_init(dbm_shard_t *shard) {
      65     1615183 :   shard->nblocks = 0;
      66     1615183 :   shard->nblocks_allocated = 0;
      67     1615183 :   shard->blocks = NULL;
      68     1615183 :   hashtable_init(shard);
      69     1615183 :   shard->data_size = 0;
      70     1615183 :   shard->data_promised = 0;
      71     1615183 :   shard->data_allocated = 0;
      72     1615183 :   shard->data = NULL;
      73     1615183 :   omp_init_lock(&shard->lock);
      74     1615183 : }
      75             : 
      76             : /*******************************************************************************
      77             :  * \brief Internal routine for copying content of shard_b into shard_a.
      78             :  * \author Ole Schuett
      79             :  ******************************************************************************/
      80      399384 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
      81      399384 :   assert(shard_a != NULL && shard_b != NULL);
      82             : 
      83      399384 :   if (shard_a->nblocks_allocated < shard_b->nblocks) {
      84      372084 :     free(shard_a->blocks);
      85      372084 :     shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
      86      372084 :     shard_a->nblocks_allocated = shard_b->nblocks;
      87      372084 :     assert(shard_a->blocks != NULL);
      88             :   }
      89      399384 :   shard_a->nblocks = shard_b->nblocks;
      90             : 
      91      399384 :   if (shard_a->hashtable_size < shard_b->hashtable_size) {
      92      374887 :     free(shard_a->hashtable);
      93      374887 :     shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
      94      374887 :     assert(shard_a->hashtable != NULL);
      95             :   }
      96      399384 :   shard_a->hashtable_size = shard_b->hashtable_size;
      97      399384 :   shard_a->hashtable_prime = shard_b->hashtable_prime;
      98             : 
      99      399384 :   if (shard_a->data_allocated < shard_b->data_size) {
     100      372084 :     dbm_mempool_host_free(shard_a->data);
     101      744168 :     shard_a->data =
     102      372084 :         dbm_mempool_host_malloc(shard_b->data_size * sizeof(double));
     103      372084 :     shard_a->data_allocated = shard_b->data_size;
     104      372084 :     assert(shard_a->data != NULL);
     105             :   }
     106      399384 :   shard_a->data_size = shard_b->data_size;
     107             : 
     108      399384 :   if (shard_a->blocks != NULL) {
     109      372114 :     memcpy(shard_a->blocks, shard_b->blocks,
     110      372114 :            shard_b->nblocks * sizeof(dbm_block_t));
     111             :   }
     112      399384 :   if (shard_a->hashtable != NULL) {
     113      399384 :     memcpy(shard_a->hashtable, shard_b->hashtable,
     114      399384 :            shard_b->hashtable_size * sizeof(int));
     115             :   }
     116      399384 :   if (shard_a->data != NULL) {
     117      372114 :     memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
     118             :   }
     119      399384 : }
     120             : 
     121             : /*******************************************************************************
     122             :  * \brief Internal routine for releasing a shard.
     123             :  * \author Ole Schuett
     124             :  ******************************************************************************/
     125     1615183 : void dbm_shard_release(dbm_shard_t *shard) {
     126     1615183 :   free(shard->blocks);
     127     1615183 :   free(shard->hashtable);
     128     1615183 :   dbm_mempool_host_free(shard->data);
     129     1615183 :   omp_destroy_lock(&shard->lock);
     130     1615183 : }
     131             : 
     132             : /*******************************************************************************
     133             :  * \brief Private hash function based on Cantor pairing function.
     134             :  *        https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
     135             :  *        Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
     136             :  *        Using unsigned int to return a positive number even after overflow.
     137             :  * \author Ole Schuett
     138             :  ******************************************************************************/
     139   227683264 : static inline unsigned int hash(const unsigned int row,
     140             :                                 const unsigned int col) {
     141   227683264 :   return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
     142             : }
     143             : 
     144             : /*******************************************************************************
     145             :  * \brief Internal routine for masking a slot in the hash-table.
     146             :  * \author Hans Pabst
     147             :  ******************************************************************************/
     148   227683264 : static inline int hashtable_mask(const dbm_shard_t *shard) {
     149   227683264 :   return shard->hashtable_size - 1;
     150             : }
     151             : 
     152             : /*******************************************************************************
     153             :  * \brief Private routine for inserting a block into a shard's hashtable.
     154             :  * \author Ole Schuett
     155             :  ******************************************************************************/
     156   111824368 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
     157   111824368 :   assert(0 <= block_idx && block_idx < shard->nblocks);
     158   111824368 :   const dbm_block_t *blk = &shard->blocks[block_idx];
     159   111824368 :   const unsigned int h = hash(blk->row, blk->col);
     160   111824368 :   int slot = (shard->hashtable_prime * h) & hashtable_mask(shard);
     161   111865174 :   for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
     162   117329739 :     for (int i = slot; i < shard->hashtable_size; ++i) {
     163   117309336 :       if (shard->hashtable[i] == 0) {        // 0 means empty
     164   111824368 :         shard->hashtable[i] = block_idx + 1; // 1-based
     165   111824368 :         return;
     166             :       }
     167             :     }
     168             :   }
     169             : }
     170             : 
     171             : /*******************************************************************************
     172             :  * \brief Internal routine for looking up a block from a shard.
     173             :  * \author Ole Schuett
     174             :  ******************************************************************************/
     175   115858896 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
     176             :                               const int col) {
     177   115858896 :   int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard);
     178       75556 :   for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
     179   122061396 :     for (int i = slot; i < shard->hashtable_size; ++i) {
     180   121985840 :       const int block_idx = shard->hashtable[i];
     181   121985840 :       if (block_idx == 0) { // 1-based, 0 means empty
     182             :         return NULL;        // block not found
     183             :       }
     184    77690404 :       assert(0 < block_idx && block_idx <= shard->nblocks);
     185    77690404 :       dbm_block_t *blk = &shard->blocks[block_idx - 1];
     186    77690404 :       if (blk->row == row && blk->col == col) {
     187             :         return blk;
     188             :       }
     189             :     }
     190             :   }
     191             : }
     192             : 
     193             : /*******************************************************************************
     194             :  * \brief Internal routine for allocating the metadata of a new block.
     195             :  * \author Ole Schuett
     196             :  ******************************************************************************/
     197    52516614 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
     198             :                                          const int col, const int block_size) {
     199             :   // Grow blocks array if necessary.
     200    52516614 :   if (shard->nblocks_allocated < shard->nblocks + 1) {
     201     4989342 :     shard->nblocks_allocated = DBM_OVERCOMMIT_HOST * (shard->nblocks + 1);
     202     4989342 :     assert((shard->nblocks + 1) <= shard->nblocks_allocated);
     203     4989342 :     shard->blocks =
     204     4989342 :         realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
     205     4989342 :     assert(shard->blocks != NULL);
     206             : 
     207             :     // rebuild hashtable
     208     4989342 :     free(shard->hashtable);
     209     4989342 :     hashtable_init(shard);
     210    64297096 :     for (int i = 0; i < shard->nblocks; i++) {
     211    59307754 :       hashtable_insert(shard, i);
     212             :     }
     213             :   }
     214             : 
     215    52516614 :   const int new_block_idx = shard->nblocks;
     216    52516614 :   shard->nblocks++;
     217    52516614 :   dbm_block_t *new_block = &shard->blocks[new_block_idx];
     218    52516614 :   new_block->row = row;
     219    52516614 :   new_block->col = col;
     220    52516614 :   new_block->offset = shard->data_promised;
     221    52516614 :   shard->data_promised += block_size;
     222             :   // The data_size will be increased after the memory is allocated and zeroed.
     223    52516614 :   hashtable_insert(shard, new_block_idx);
     224    52516614 :   return new_block;
     225             : }
     226             : 
     227             : /*******************************************************************************
     228             :  * \brief Internal routine for allocating and zeroing any promised block's data.
     229             :  * \author Ole Schuett
     230             :  ******************************************************************************/
     231    12845676 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
     232             : 
     233             :   // Reallocate data array if necessary.
     234    12845676 :   if (shard->data_promised > shard->data_allocated) {
     235     1424251 :     const double *data = shard->data;
     236     1424251 :     shard->data_allocated = DBM_OVERCOMMIT_HOST * shard->data_promised;
     237     1424251 :     assert(shard->data_promised <= shard->data_allocated);
     238     2848502 :     shard->data =
     239     1424251 :         dbm_mempool_host_malloc(shard->data_allocated * sizeof(double));
     240     1424251 :     assert(shard->data != NULL);
     241     1424251 :     memcpy(shard->data, data, shard->data_size * sizeof(double));
     242     1424251 :     dbm_mempool_host_free(data);
     243             :   }
     244             : 
     245             :   // Zero new blocks.
     246             :   // The following memset is usually the first touch of the memory, which leads
     247             :   // to frequent page faults. The executing thread determines the NUMA location
     248    12845676 :   if (shard->data_promised > shard->data_size) {
     249    12548787 :     const int tail = shard->data_promised - shard->data_size;
     250    12548787 :     memset(shard->data + shard->data_size, 0, tail * sizeof(double));
     251    12548787 :     shard->data_size = shard->data_promised;
     252             :   }
     253    12845676 : }
     254             : 
     255             : /*******************************************************************************
     256             :  * \brief Internal routine for getting block or promising a new one.
     257             :  * \author Ole Schuett
     258             :  ******************************************************************************/
     259    29073099 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
     260             :                                             const int col,
     261             :                                             const int block_size) {
     262    29073099 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     263    29073099 :   if (existing_blk != NULL) {
     264             :     return existing_blk;
     265             :   } else {
     266    27106204 :     return dbm_shard_promise_new_block(shard, row, col, block_size);
     267             :   }
     268             : }
     269             : 
     270             : /*******************************************************************************
     271             :  * \brief Internal routine for getting block or allocating a new one.
     272             :  * \author Ole Schuett
     273             :  ******************************************************************************/
     274    40197700 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
     275             :                                              const int col,
     276             :                                              const int block_size) {
     277    40197700 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     278    40197700 :   if (existing_blk != NULL) {
     279             :     return existing_blk;
     280             :   }
     281             : 
     282             :   // Create a new block.
     283    11327517 :   dbm_block_t *new_blk =
     284    11327517 :       dbm_shard_promise_new_block(shard, row, col, block_size);
     285    11327517 :   dbm_shard_allocate_promised_blocks(shard);
     286             : 
     287    11327517 :   return new_blk;
     288             : }
     289             : 
     290             : // EOF

Generated by: LCOV version 1.15