Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : #include <assert.h> 8 : #include <omp.h> 9 : #include <stdbool.h> 10 : #include <stddef.h> 11 : #include <stdlib.h> 12 : #include <string.h> 13 : 14 : #include "dbm_hyperparams.h" 15 : #include "dbm_mempool.h" 16 : #include "dbm_shard.h" 17 : 18 : /******************************************************************************* 19 : * \brief Internal routine for finding a power of two greater than given number. 20 : * \author Ole Schuett 21 : ******************************************************************************/ 22 6863332 : static int next_power2(const int start) { 23 6863332 : int candidate = 2; 24 24667198 : while (candidate < start) { 25 17803866 : candidate *= 2; 26 : } 27 6863332 : return candidate; 28 : } 29 : 30 : /******************************************************************************* 31 : * \brief Internal routine for finding a prime greater equal than given number. 32 : * \author Ole Schuett 33 : ******************************************************************************/ 34 6863332 : static int next_prime(const int start) { 35 6863332 : int candidate = start, divisor = 0; 36 27050136 : while (divisor < candidate) { 37 476426453 : for (divisor = 2; divisor < candidate; divisor++) { 38 469563121 : if (candidate % divisor == 0) { 39 13323472 : candidate++; 40 13323472 : break; 41 : } 42 : } 43 : } 44 6863332 : return candidate; 45 : } 46 : 47 : /******************************************************************************* 48 : * \brief Internal routine for initializing a shard's hashtable. 49 : * \author Ole Schuett 50 : ******************************************************************************/ 51 6863332 : static void hashtable_init(dbm_shard_t *shard) { 52 : // Choosing size as power of two allows to replace modulo with bitwise AND. 53 13726664 : shard->hashtable_size = 54 6863332 : next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated); 55 6863332 : shard->hashtable_prime = next_prime(shard->hashtable_size); 56 6863332 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int)); 57 6863332 : assert(shard->hashtable != NULL); 58 6863332 : } 59 : 60 : /******************************************************************************* 61 : * \brief Internal routine for initializing a shard. 62 : * \author Ole Schuett 63 : ******************************************************************************/ 64 1684494 : void dbm_shard_init(dbm_shard_t *shard) { 65 1684494 : shard->nblocks = 0; 66 1684494 : shard->nblocks_allocated = 0; 67 1684494 : shard->blocks = NULL; 68 1684494 : hashtable_init(shard); 69 1684494 : shard->data_size = 0; 70 1684494 : shard->data_promised = 0; 71 1684494 : shard->data_allocated = 0; 72 1684494 : shard->data = NULL; 73 1684494 : omp_init_lock(&shard->lock); 74 1684494 : } 75 : 76 : /******************************************************************************* 77 : * \brief Internal routine for copying content of shard_b into shard_a. 78 : * \author Ole Schuett 79 : ******************************************************************************/ 80 421322 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) { 81 421322 : assert(shard_a != NULL && shard_b != NULL); 82 : 83 421322 : if (shard_a->nblocks_allocated < shard_b->nblocks) { 84 393717 : free(shard_a->blocks); 85 393717 : shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t)); 86 393717 : shard_a->nblocks_allocated = shard_b->nblocks; 87 393717 : assert(shard_a->blocks != NULL); 88 : } 89 421322 : shard_a->nblocks = shard_b->nblocks; 90 : 91 421322 : if (shard_a->hashtable_size < shard_b->hashtable_size) { 92 396542 : free(shard_a->hashtable); 93 396542 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int)); 94 396542 : assert(shard_a->hashtable != NULL); 95 : } 96 421322 : shard_a->hashtable_size = shard_b->hashtable_size; 97 421322 : shard_a->hashtable_prime = shard_b->hashtable_prime; 98 : 99 421322 : if (shard_a->data_allocated < shard_b->data_size) { 100 393717 : dbm_mempool_host_free(shard_a->data); 101 787434 : shard_a->data = 102 393717 : dbm_mempool_host_malloc(shard_b->data_size * sizeof(double)); 103 393717 : shard_a->data_allocated = shard_b->data_size; 104 393717 : assert(shard_a->data != NULL); 105 : } 106 421322 : shard_a->data_size = shard_b->data_size; 107 : 108 421322 : if (shard_b->nblocks != 0) { 109 393747 : assert(shard_a->blocks != NULL && shard_b->blocks != NULL); 110 393747 : memcpy(shard_a->blocks, shard_b->blocks, 111 393747 : shard_b->nblocks * sizeof(dbm_block_t)); 112 : } 113 421322 : if (shard_b->hashtable_size != 0) { 114 421322 : assert(shard_a->hashtable != NULL && shard_b->hashtable != NULL); 115 421322 : memcpy(shard_a->hashtable, shard_b->hashtable, 116 421322 : shard_b->hashtable_size * sizeof(int)); 117 : } 118 421322 : if (shard_b->data_size != 0) { 119 393747 : assert(shard_a->data != NULL && shard_b->data != NULL); 120 393747 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double)); 121 : } 122 421322 : } 123 : 124 : /******************************************************************************* 125 : * \brief Internal routine for releasing a shard. 126 : * \author Ole Schuett 127 : ******************************************************************************/ 128 1684494 : void dbm_shard_release(dbm_shard_t *shard) { 129 1684494 : free(shard->blocks); 130 1684494 : free(shard->hashtable); 131 1684494 : dbm_mempool_host_free(shard->data); 132 1684494 : omp_destroy_lock(&shard->lock); 133 1684494 : } 134 : 135 : /******************************************************************************* 136 : * \brief Private hash function based on Cantor pairing function. 137 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function 138 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col. 139 : * Using unsigned int to return a positive number even after overflow. 140 : * \author Ole Schuett 141 : ******************************************************************************/ 142 234617193 : static inline unsigned int hash(const unsigned int row, 143 : const unsigned int col) { 144 234617193 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap. 145 : } 146 : 147 : /******************************************************************************* 148 : * \brief Internal routine for masking a slot in the hash-table. 149 : * \author Hans Pabst 150 : ******************************************************************************/ 151 234617193 : static inline int hashtable_mask(const dbm_shard_t *shard) { 152 234617193 : return shard->hashtable_size - 1; 153 : } 154 : 155 : /******************************************************************************* 156 : * \brief Private routine for inserting a block into a shard's hashtable. 157 : * \author Ole Schuett 158 : ******************************************************************************/ 159 117531208 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) { 160 117531208 : assert(0 <= block_idx && block_idx < shard->nblocks); 161 117531208 : const dbm_block_t *blk = &shard->blocks[block_idx]; 162 117531208 : const unsigned int h = hash(blk->row, blk->col); 163 117531208 : int slot = (shard->hashtable_prime * h) & hashtable_mask(shard); 164 117573646 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing 165 123629255 : for (int i = slot; i < shard->hashtable_size; ++i) { 166 123608036 : if (shard->hashtable[i] == 0) { // 0 means empty 167 117531208 : shard->hashtable[i] = block_idx + 1; // 1-based 168 117531208 : return; 169 : } 170 : } 171 : } 172 : } 173 : 174 : /******************************************************************************* 175 : * \brief Internal routine for looking up a block from a shard. 176 : * \author Ole Schuett 177 : ******************************************************************************/ 178 117085985 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row, 179 : const int col) { 180 117085985 : int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard); 181 74097 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing 182 123554270 : for (int i = slot; i < shard->hashtable_size; ++i) { 183 123480173 : const int block_idx = shard->hashtable[i]; 184 123480173 : if (block_idx == 0) { // 1-based, 0 means empty 185 : return NULL; // block not found 186 : } 187 79928950 : assert(0 < block_idx && block_idx <= shard->nblocks); 188 79928950 : dbm_block_t *blk = &shard->blocks[block_idx - 1]; 189 79928950 : if (blk->row == row && blk->col == col) { 190 : return blk; 191 : } 192 : } 193 : } 194 : } 195 : 196 : /******************************************************************************* 197 : * \brief Internal routine for allocating the metadata of a new block. 198 : * \author Ole Schuett 199 : ******************************************************************************/ 200 53261368 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, 201 : const int col, const int block_size) { 202 : // Grow blocks array if necessary. 203 53261368 : if (shard->nblocks_allocated < shard->nblocks + 1) { 204 5178838 : shard->nblocks_allocated = DBM_OVERCOMMIT_HOST * (shard->nblocks + 1); 205 5178838 : assert((shard->nblocks + 1) <= shard->nblocks_allocated); 206 5178838 : shard->blocks = 207 5178838 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t)); 208 5178838 : assert(shard->blocks != NULL); 209 : 210 : // rebuild hashtable 211 5178838 : free(shard->hashtable); 212 5178838 : hashtable_init(shard); 213 69448678 : for (int i = 0; i < shard->nblocks; i++) { 214 64269840 : hashtable_insert(shard, i); 215 : } 216 : } 217 : 218 53261368 : const int new_block_idx = shard->nblocks; 219 53261368 : shard->nblocks++; 220 53261368 : dbm_block_t *new_block = &shard->blocks[new_block_idx]; 221 53261368 : new_block->row = row; 222 53261368 : new_block->col = col; 223 53261368 : new_block->offset = shard->data_promised; 224 53261368 : shard->data_promised += block_size; 225 : // The data_size will be increased after the memory is allocated and zeroed. 226 53261368 : hashtable_insert(shard, new_block_idx); 227 53261368 : return new_block; 228 : } 229 : 230 : /******************************************************************************* 231 : * \brief Internal routine for allocating and zeroing any promised block's data. 232 : * \author Ole Schuett 233 : ******************************************************************************/ 234 11523459 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) { 235 : 236 : // Reallocate data array if necessary. 237 11523459 : if (shard->data_promised > shard->data_allocated) { 238 1478301 : const double *data = shard->data; 239 1478301 : shard->data_allocated = DBM_OVERCOMMIT_HOST * shard->data_promised; 240 1478301 : assert(shard->data_promised <= shard->data_allocated); 241 2956602 : shard->data = 242 1478301 : dbm_mempool_host_malloc(shard->data_allocated * sizeof(double)); 243 1478301 : assert(shard->data != NULL); 244 1478301 : if (data != NULL) { 245 400764 : memcpy(shard->data, data, shard->data_size * sizeof(double)); 246 400764 : dbm_mempool_host_free(data); 247 : } 248 : } 249 : 250 : // Zero new blocks. 251 : // The following memset is usually the first touch of the memory, which leads 252 : // to frequent page faults. The executing thread determines the NUMA location 253 11523459 : if (shard->data_promised > shard->data_size) { 254 11198838 : const int tail = shard->data_promised - shard->data_size; 255 11198838 : memset(shard->data + shard->data_size, 0, tail * sizeof(double)); 256 11198838 : shard->data_size = shard->data_promised; 257 : } 258 11523459 : } 259 : 260 : /******************************************************************************* 261 : * \brief Internal routine for getting block or promising a new one. 262 : * \author Ole Schuett 263 : ******************************************************************************/ 264 29061202 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, 265 : const int col, 266 : const int block_size) { 267 29061202 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 268 29061202 : if (existing_blk != NULL) { 269 : return existing_blk; 270 : } else { 271 26837369 : return dbm_shard_promise_new_block(shard, row, col, block_size); 272 : } 273 : } 274 : 275 : /******************************************************************************* 276 : * \brief Internal routine for getting block or allocating a new one. 277 : * \author Ole Schuett 278 : ******************************************************************************/ 279 38650693 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, 280 : const int col, 281 : const int block_size) { 282 38650693 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 283 38650693 : if (existing_blk != NULL) { 284 : return existing_blk; 285 : } 286 : 287 : // Create a new block. 288 9905189 : dbm_block_t *new_blk = 289 9905189 : dbm_shard_promise_new_block(shard, row, col, block_size); 290 9905189 : dbm_shard_allocate_promised_blocks(shard); 291 : 292 9905189 : return new_blk; 293 : } 294 : 295 : // EOF