LCOV - code coverage report
Current view: top level - src/dbm - dbm_shard.c (source / functions) Hit Total Coverage
Test: CP2K Regtests (git:3158929) Lines: 140 140 100.0 %
Date: 2025-07-22 22:41:15 Functions: 12 12 100.0 %

          Line data    Source code
       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : #include <assert.h>
       8             : #include <omp.h>
       9             : #include <stdbool.h>
      10             : #include <stddef.h>
      11             : #include <stdlib.h>
      12             : #include <string.h>
      13             : 
      14             : #include "dbm_hyperparams.h"
      15             : #include "dbm_mempool.h"
      16             : #include "dbm_shard.h"
      17             : 
      18             : /*******************************************************************************
      19             :  * \brief Internal routine for finding a power of two greater than given number.
      20             :  * \author Ole Schuett
      21             :  ******************************************************************************/
      22     6863332 : static int next_power2(const int start) {
      23     6863332 :   int candidate = 2;
      24    24667198 :   while (candidate < start) {
      25    17803866 :     candidate *= 2;
      26             :   }
      27     6863332 :   return candidate;
      28             : }
      29             : 
      30             : /*******************************************************************************
      31             :  * \brief Internal routine for finding a prime greater equal than given number.
      32             :  * \author Ole Schuett
      33             :  ******************************************************************************/
      34     6863332 : static int next_prime(const int start) {
      35     6863332 :   int candidate = start, divisor = 0;
      36    27050136 :   while (divisor < candidate) {
      37   476426453 :     for (divisor = 2; divisor < candidate; divisor++) {
      38   469563121 :       if (candidate % divisor == 0) {
      39    13323472 :         candidate++;
      40    13323472 :         break;
      41             :       }
      42             :     }
      43             :   }
      44     6863332 :   return candidate;
      45             : }
      46             : 
      47             : /*******************************************************************************
      48             :  * \brief Internal routine for initializing a shard's hashtable.
      49             :  * \author Ole Schuett
      50             :  ******************************************************************************/
      51     6863332 : static void hashtable_init(dbm_shard_t *shard) {
      52             :   // Choosing size as power of two allows to replace modulo with bitwise AND.
      53    13726664 :   shard->hashtable_size =
      54     6863332 :       next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated);
      55     6863332 :   shard->hashtable_prime = next_prime(shard->hashtable_size);
      56     6863332 :   shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
      57     6863332 :   assert(shard->hashtable != NULL);
      58     6863332 : }
      59             : 
      60             : /*******************************************************************************
      61             :  * \brief Internal routine for initializing a shard.
      62             :  * \author Ole Schuett
      63             :  ******************************************************************************/
      64     1684494 : void dbm_shard_init(dbm_shard_t *shard) {
      65     1684494 :   shard->nblocks = 0;
      66     1684494 :   shard->nblocks_allocated = 0;
      67     1684494 :   shard->blocks = NULL;
      68     1684494 :   hashtable_init(shard);
      69     1684494 :   shard->data_size = 0;
      70     1684494 :   shard->data_promised = 0;
      71     1684494 :   shard->data_allocated = 0;
      72     1684494 :   shard->data = NULL;
      73     1684494 :   omp_init_lock(&shard->lock);
      74     1684494 : }
      75             : 
      76             : /*******************************************************************************
      77             :  * \brief Internal routine for copying content of shard_b into shard_a.
      78             :  * \author Ole Schuett
      79             :  ******************************************************************************/
      80      421322 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
      81      421322 :   assert(shard_a != NULL && shard_b != NULL);
      82             : 
      83      421322 :   if (shard_a->nblocks_allocated < shard_b->nblocks) {
      84      393717 :     free(shard_a->blocks);
      85      393717 :     shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
      86      393717 :     shard_a->nblocks_allocated = shard_b->nblocks;
      87      393717 :     assert(shard_a->blocks != NULL);
      88             :   }
      89      421322 :   shard_a->nblocks = shard_b->nblocks;
      90             : 
      91      421322 :   if (shard_a->hashtable_size < shard_b->hashtable_size) {
      92      396542 :     free(shard_a->hashtable);
      93      396542 :     shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
      94      396542 :     assert(shard_a->hashtable != NULL);
      95             :   }
      96      421322 :   shard_a->hashtable_size = shard_b->hashtable_size;
      97      421322 :   shard_a->hashtable_prime = shard_b->hashtable_prime;
      98             : 
      99      421322 :   if (shard_a->data_allocated < shard_b->data_size) {
     100      393717 :     dbm_mempool_host_free(shard_a->data);
     101      787434 :     shard_a->data =
     102      393717 :         dbm_mempool_host_malloc(shard_b->data_size * sizeof(double));
     103      393717 :     shard_a->data_allocated = shard_b->data_size;
     104      393717 :     assert(shard_a->data != NULL);
     105             :   }
     106      421322 :   shard_a->data_size = shard_b->data_size;
     107             : 
     108      421322 :   if (shard_b->nblocks != 0) {
     109      393747 :     assert(shard_a->blocks != NULL && shard_b->blocks != NULL);
     110      393747 :     memcpy(shard_a->blocks, shard_b->blocks,
     111      393747 :            shard_b->nblocks * sizeof(dbm_block_t));
     112             :   }
     113      421322 :   if (shard_b->hashtable_size != 0) {
     114      421322 :     assert(shard_a->hashtable != NULL && shard_b->hashtable != NULL);
     115      421322 :     memcpy(shard_a->hashtable, shard_b->hashtable,
     116      421322 :            shard_b->hashtable_size * sizeof(int));
     117             :   }
     118      421322 :   if (shard_b->data_size != 0) {
     119      393747 :     assert(shard_a->data != NULL && shard_b->data != NULL);
     120      393747 :     memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
     121             :   }
     122      421322 : }
     123             : 
     124             : /*******************************************************************************
     125             :  * \brief Internal routine for releasing a shard.
     126             :  * \author Ole Schuett
     127             :  ******************************************************************************/
     128     1684494 : void dbm_shard_release(dbm_shard_t *shard) {
     129     1684494 :   free(shard->blocks);
     130     1684494 :   free(shard->hashtable);
     131     1684494 :   dbm_mempool_host_free(shard->data);
     132     1684494 :   omp_destroy_lock(&shard->lock);
     133     1684494 : }
     134             : 
     135             : /*******************************************************************************
     136             :  * \brief Private hash function based on Cantor pairing function.
     137             :  *        https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
     138             :  *        Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
     139             :  *        Using unsigned int to return a positive number even after overflow.
     140             :  * \author Ole Schuett
     141             :  ******************************************************************************/
     142   234617193 : static inline unsigned int hash(const unsigned int row,
     143             :                                 const unsigned int col) {
     144   234617193 :   return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
     145             : }
     146             : 
     147             : /*******************************************************************************
     148             :  * \brief Internal routine for masking a slot in the hash-table.
     149             :  * \author Hans Pabst
     150             :  ******************************************************************************/
     151   234617193 : static inline int hashtable_mask(const dbm_shard_t *shard) {
     152   234617193 :   return shard->hashtable_size - 1;
     153             : }
     154             : 
     155             : /*******************************************************************************
     156             :  * \brief Private routine for inserting a block into a shard's hashtable.
     157             :  * \author Ole Schuett
     158             :  ******************************************************************************/
     159   117531208 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
     160   117531208 :   assert(0 <= block_idx && block_idx < shard->nblocks);
     161   117531208 :   const dbm_block_t *blk = &shard->blocks[block_idx];
     162   117531208 :   const unsigned int h = hash(blk->row, blk->col);
     163   117531208 :   int slot = (shard->hashtable_prime * h) & hashtable_mask(shard);
     164   117573646 :   for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
     165   123629255 :     for (int i = slot; i < shard->hashtable_size; ++i) {
     166   123608036 :       if (shard->hashtable[i] == 0) {        // 0 means empty
     167   117531208 :         shard->hashtable[i] = block_idx + 1; // 1-based
     168   117531208 :         return;
     169             :       }
     170             :     }
     171             :   }
     172             : }
     173             : 
     174             : /*******************************************************************************
     175             :  * \brief Internal routine for looking up a block from a shard.
     176             :  * \author Ole Schuett
     177             :  ******************************************************************************/
     178   117085985 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
     179             :                               const int col) {
     180   117085985 :   int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard);
     181       74097 :   for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
     182   123554270 :     for (int i = slot; i < shard->hashtable_size; ++i) {
     183   123480173 :       const int block_idx = shard->hashtable[i];
     184   123480173 :       if (block_idx == 0) { // 1-based, 0 means empty
     185             :         return NULL;        // block not found
     186             :       }
     187    79928950 :       assert(0 < block_idx && block_idx <= shard->nblocks);
     188    79928950 :       dbm_block_t *blk = &shard->blocks[block_idx - 1];
     189    79928950 :       if (blk->row == row && blk->col == col) {
     190             :         return blk;
     191             :       }
     192             :     }
     193             :   }
     194             : }
     195             : 
     196             : /*******************************************************************************
     197             :  * \brief Internal routine for allocating the metadata of a new block.
     198             :  * \author Ole Schuett
     199             :  ******************************************************************************/
     200    53261368 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
     201             :                                          const int col, const int block_size) {
     202             :   // Grow blocks array if necessary.
     203    53261368 :   if (shard->nblocks_allocated < shard->nblocks + 1) {
     204     5178838 :     shard->nblocks_allocated = DBM_OVERCOMMIT_HOST * (shard->nblocks + 1);
     205     5178838 :     assert((shard->nblocks + 1) <= shard->nblocks_allocated);
     206     5178838 :     shard->blocks =
     207     5178838 :         realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
     208     5178838 :     assert(shard->blocks != NULL);
     209             : 
     210             :     // rebuild hashtable
     211     5178838 :     free(shard->hashtable);
     212     5178838 :     hashtable_init(shard);
     213    69448678 :     for (int i = 0; i < shard->nblocks; i++) {
     214    64269840 :       hashtable_insert(shard, i);
     215             :     }
     216             :   }
     217             : 
     218    53261368 :   const int new_block_idx = shard->nblocks;
     219    53261368 :   shard->nblocks++;
     220    53261368 :   dbm_block_t *new_block = &shard->blocks[new_block_idx];
     221    53261368 :   new_block->row = row;
     222    53261368 :   new_block->col = col;
     223    53261368 :   new_block->offset = shard->data_promised;
     224    53261368 :   shard->data_promised += block_size;
     225             :   // The data_size will be increased after the memory is allocated and zeroed.
     226    53261368 :   hashtable_insert(shard, new_block_idx);
     227    53261368 :   return new_block;
     228             : }
     229             : 
     230             : /*******************************************************************************
     231             :  * \brief Internal routine for allocating and zeroing any promised block's data.
     232             :  * \author Ole Schuett
     233             :  ******************************************************************************/
     234    11523459 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
     235             : 
     236             :   // Reallocate data array if necessary.
     237    11523459 :   if (shard->data_promised > shard->data_allocated) {
     238     1478301 :     const double *data = shard->data;
     239     1478301 :     shard->data_allocated = DBM_OVERCOMMIT_HOST * shard->data_promised;
     240     1478301 :     assert(shard->data_promised <= shard->data_allocated);
     241     2956602 :     shard->data =
     242     1478301 :         dbm_mempool_host_malloc(shard->data_allocated * sizeof(double));
     243     1478301 :     assert(shard->data != NULL);
     244     1478301 :     if (data != NULL) {
     245      400764 :       memcpy(shard->data, data, shard->data_size * sizeof(double));
     246      400764 :       dbm_mempool_host_free(data);
     247             :     }
     248             :   }
     249             : 
     250             :   // Zero new blocks.
     251             :   // The following memset is usually the first touch of the memory, which leads
     252             :   // to frequent page faults. The executing thread determines the NUMA location
     253    11523459 :   if (shard->data_promised > shard->data_size) {
     254    11198838 :     const int tail = shard->data_promised - shard->data_size;
     255    11198838 :     memset(shard->data + shard->data_size, 0, tail * sizeof(double));
     256    11198838 :     shard->data_size = shard->data_promised;
     257             :   }
     258    11523459 : }
     259             : 
     260             : /*******************************************************************************
     261             :  * \brief Internal routine for getting block or promising a new one.
     262             :  * \author Ole Schuett
     263             :  ******************************************************************************/
     264    29061202 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
     265             :                                             const int col,
     266             :                                             const int block_size) {
     267    29061202 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     268    29061202 :   if (existing_blk != NULL) {
     269             :     return existing_blk;
     270             :   } else {
     271    26837369 :     return dbm_shard_promise_new_block(shard, row, col, block_size);
     272             :   }
     273             : }
     274             : 
     275             : /*******************************************************************************
     276             :  * \brief Internal routine for getting block or allocating a new one.
     277             :  * \author Ole Schuett
     278             :  ******************************************************************************/
     279    38650693 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
     280             :                                              const int col,
     281             :                                              const int block_size) {
     282    38650693 :   dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
     283    38650693 :   if (existing_blk != NULL) {
     284             :     return existing_blk;
     285             :   }
     286             : 
     287             :   // Create a new block.
     288     9905189 :   dbm_block_t *new_blk =
     289     9905189 :       dbm_shard_promise_new_block(shard, row, col, block_size);
     290     9905189 :   dbm_shard_allocate_promised_blocks(shard);
     291             : 
     292     9905189 :   return new_blk;
     293             : }
     294             : 
     295             : // EOF

Generated by: LCOV version 1.15