Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : #include <assert.h> 8 : #include <omp.h> 9 : #include <stdbool.h> 10 : #include <stddef.h> 11 : #include <stdlib.h> 12 : #include <string.h> 13 : 14 : #include "dbm_hyperparams.h" 15 : #include "dbm_mempool.h" 16 : #include "dbm_shard.h" 17 : 18 : /******************************************************************************* 19 : * \brief Internal routine for finding a power of two greater than given number. 20 : * \author Ole Schuett 21 : ******************************************************************************/ 22 6604525 : static int next_power2(const int start) { 23 6604525 : int candidate = 2; 24 23684473 : while (candidate < start) { 25 17079948 : candidate *= 2; 26 : } 27 6604525 : return candidate; 28 : } 29 : 30 : /******************************************************************************* 31 : * \brief Internal routine for finding a prime greater equal than given number. 32 : * \author Ole Schuett 33 : ******************************************************************************/ 34 6604525 : static int next_prime(const int start) { 35 6604525 : int candidate = start, divisor = 0; 36 25974886 : while (divisor < candidate) { 37 440490010 : for (divisor = 2; divisor < candidate; divisor++) { 38 433885485 : if (candidate % divisor == 0) { 39 12765836 : candidate++; 40 12765836 : break; 41 : } 42 : } 43 : } 44 6604525 : return candidate; 45 : } 46 : 47 : /******************************************************************************* 48 : * \brief Internal routine for initializing a shard's hashtable. 49 : * \author Ole Schuett 50 : ******************************************************************************/ 51 6604525 : static void hashtable_init(dbm_shard_t *shard) { 52 : // Choosing size as power of two allows to replace modulo with bitwise AND. 53 13209050 : shard->hashtable_size = 54 6604525 : next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated); 55 6604525 : shard->hashtable_prime = next_prime(shard->hashtable_size); 56 6604525 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int)); 57 6604525 : assert(shard->hashtable != NULL); 58 6604525 : } 59 : 60 : /******************************************************************************* 61 : * \brief Internal routine for initializing a shard. 62 : * \author Ole Schuett 63 : ******************************************************************************/ 64 1615183 : void dbm_shard_init(dbm_shard_t *shard) { 65 1615183 : shard->nblocks = 0; 66 1615183 : shard->nblocks_allocated = 0; 67 1615183 : shard->blocks = NULL; 68 1615183 : hashtable_init(shard); 69 1615183 : shard->data_size = 0; 70 1615183 : shard->data_promised = 0; 71 1615183 : shard->data_allocated = 0; 72 1615183 : shard->data = NULL; 73 1615183 : omp_init_lock(&shard->lock); 74 1615183 : } 75 : 76 : /******************************************************************************* 77 : * \brief Internal routine for copying content of shard_b into shard_a. 78 : * \author Ole Schuett 79 : ******************************************************************************/ 80 399384 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) { 81 399384 : assert(shard_a != NULL && shard_b != NULL); 82 : 83 399384 : if (shard_a->nblocks_allocated < shard_b->nblocks) { 84 372084 : free(shard_a->blocks); 85 372084 : shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t)); 86 372084 : shard_a->nblocks_allocated = shard_b->nblocks; 87 372084 : assert(shard_a->blocks != NULL); 88 : } 89 399384 : shard_a->nblocks = shard_b->nblocks; 90 : 91 399384 : if (shard_a->hashtable_size < shard_b->hashtable_size) { 92 374887 : free(shard_a->hashtable); 93 374887 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int)); 94 374887 : assert(shard_a->hashtable != NULL); 95 : } 96 399384 : shard_a->hashtable_size = shard_b->hashtable_size; 97 399384 : shard_a->hashtable_prime = shard_b->hashtable_prime; 98 : 99 399384 : if (shard_a->data_allocated < shard_b->data_size) { 100 372084 : dbm_mempool_host_free(shard_a->data); 101 744168 : shard_a->data = 102 372084 : dbm_mempool_host_malloc(shard_b->data_size * sizeof(double)); 103 372084 : shard_a->data_allocated = shard_b->data_size; 104 372084 : assert(shard_a->data != NULL); 105 : } 106 399384 : shard_a->data_size = shard_b->data_size; 107 : 108 399384 : if (shard_a->blocks != NULL) { 109 372114 : memcpy(shard_a->blocks, shard_b->blocks, 110 372114 : shard_b->nblocks * sizeof(dbm_block_t)); 111 : } 112 399384 : if (shard_a->hashtable != NULL) { 113 399384 : memcpy(shard_a->hashtable, shard_b->hashtable, 114 399384 : shard_b->hashtable_size * sizeof(int)); 115 : } 116 399384 : if (shard_a->data != NULL) { 117 372114 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double)); 118 : } 119 399384 : } 120 : 121 : /******************************************************************************* 122 : * \brief Internal routine for releasing a shard. 123 : * \author Ole Schuett 124 : ******************************************************************************/ 125 1615183 : void dbm_shard_release(dbm_shard_t *shard) { 126 1615183 : free(shard->blocks); 127 1615183 : free(shard->hashtable); 128 1615183 : dbm_mempool_host_free(shard->data); 129 1615183 : omp_destroy_lock(&shard->lock); 130 1615183 : } 131 : 132 : /******************************************************************************* 133 : * \brief Private hash function based on Cantor pairing function. 134 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function 135 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col. 136 : * Using unsigned int to return a positive number even after overflow. 137 : * \author Ole Schuett 138 : ******************************************************************************/ 139 227683264 : static inline unsigned int hash(const unsigned int row, 140 : const unsigned int col) { 141 227683264 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap. 142 : } 143 : 144 : /******************************************************************************* 145 : * \brief Internal routine for masking a slot in the hash-table. 146 : * \author Hans Pabst 147 : ******************************************************************************/ 148 227683264 : static inline int hashtable_mask(const dbm_shard_t *shard) { 149 227683264 : return shard->hashtable_size - 1; 150 : } 151 : 152 : /******************************************************************************* 153 : * \brief Private routine for inserting a block into a shard's hashtable. 154 : * \author Ole Schuett 155 : ******************************************************************************/ 156 111824368 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) { 157 111824368 : assert(0 <= block_idx && block_idx < shard->nblocks); 158 111824368 : const dbm_block_t *blk = &shard->blocks[block_idx]; 159 111824368 : const unsigned int h = hash(blk->row, blk->col); 160 111824368 : int slot = (shard->hashtable_prime * h) & hashtable_mask(shard); 161 111865174 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing 162 117329739 : for (int i = slot; i < shard->hashtable_size; ++i) { 163 117309336 : if (shard->hashtable[i] == 0) { // 0 means empty 164 111824368 : shard->hashtable[i] = block_idx + 1; // 1-based 165 111824368 : return; 166 : } 167 : } 168 : } 169 : } 170 : 171 : /******************************************************************************* 172 : * \brief Internal routine for looking up a block from a shard. 173 : * \author Ole Schuett 174 : ******************************************************************************/ 175 115858896 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row, 176 : const int col) { 177 115858896 : int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard); 178 75556 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing 179 122061396 : for (int i = slot; i < shard->hashtable_size; ++i) { 180 121985840 : const int block_idx = shard->hashtable[i]; 181 121985840 : if (block_idx == 0) { // 1-based, 0 means empty 182 : return NULL; // block not found 183 : } 184 77690404 : assert(0 < block_idx && block_idx <= shard->nblocks); 185 77690404 : dbm_block_t *blk = &shard->blocks[block_idx - 1]; 186 77690404 : if (blk->row == row && blk->col == col) { 187 : return blk; 188 : } 189 : } 190 : } 191 : } 192 : 193 : /******************************************************************************* 194 : * \brief Internal routine for allocating the metadata of a new block. 195 : * \author Ole Schuett 196 : ******************************************************************************/ 197 52516614 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, 198 : const int col, const int block_size) { 199 : // Grow blocks array if necessary. 200 52516614 : if (shard->nblocks_allocated < shard->nblocks + 1) { 201 4989342 : shard->nblocks_allocated = DBM_OVERCOMMIT_HOST * (shard->nblocks + 1); 202 4989342 : assert((shard->nblocks + 1) <= shard->nblocks_allocated); 203 4989342 : shard->blocks = 204 4989342 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t)); 205 4989342 : assert(shard->blocks != NULL); 206 : 207 : // rebuild hashtable 208 4989342 : free(shard->hashtable); 209 4989342 : hashtable_init(shard); 210 64297096 : for (int i = 0; i < shard->nblocks; i++) { 211 59307754 : hashtable_insert(shard, i); 212 : } 213 : } 214 : 215 52516614 : const int new_block_idx = shard->nblocks; 216 52516614 : shard->nblocks++; 217 52516614 : dbm_block_t *new_block = &shard->blocks[new_block_idx]; 218 52516614 : new_block->row = row; 219 52516614 : new_block->col = col; 220 52516614 : new_block->offset = shard->data_promised; 221 52516614 : shard->data_promised += block_size; 222 : // The data_size will be increased after the memory is allocated and zeroed. 223 52516614 : hashtable_insert(shard, new_block_idx); 224 52516614 : return new_block; 225 : } 226 : 227 : /******************************************************************************* 228 : * \brief Internal routine for allocating and zeroing any promised block's data. 229 : * \author Ole Schuett 230 : ******************************************************************************/ 231 12845676 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) { 232 : 233 : // Reallocate data array if necessary. 234 12845676 : if (shard->data_promised > shard->data_allocated) { 235 1424251 : const double *data = shard->data; 236 1424251 : shard->data_allocated = DBM_OVERCOMMIT_HOST * shard->data_promised; 237 1424251 : assert(shard->data_promised <= shard->data_allocated); 238 2848502 : shard->data = 239 1424251 : dbm_mempool_host_malloc(shard->data_allocated * sizeof(double)); 240 1424251 : assert(shard->data != NULL); 241 1424251 : memcpy(shard->data, data, shard->data_size * sizeof(double)); 242 1424251 : dbm_mempool_host_free(data); 243 : } 244 : 245 : // Zero new blocks. 246 : // The following memset is usually the first touch of the memory, which leads 247 : // to frequent page faults. The executing thread determines the NUMA location 248 12845676 : if (shard->data_promised > shard->data_size) { 249 12548787 : const int tail = shard->data_promised - shard->data_size; 250 12548787 : memset(shard->data + shard->data_size, 0, tail * sizeof(double)); 251 12548787 : shard->data_size = shard->data_promised; 252 : } 253 12845676 : } 254 : 255 : /******************************************************************************* 256 : * \brief Internal routine for getting block or promising a new one. 257 : * \author Ole Schuett 258 : ******************************************************************************/ 259 29073099 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, 260 : const int col, 261 : const int block_size) { 262 29073099 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 263 29073099 : if (existing_blk != NULL) { 264 : return existing_blk; 265 : } else { 266 27106204 : return dbm_shard_promise_new_block(shard, row, col, block_size); 267 : } 268 : } 269 : 270 : /******************************************************************************* 271 : * \brief Internal routine for getting block or allocating a new one. 272 : * \author Ole Schuett 273 : ******************************************************************************/ 274 40197700 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, 275 : const int col, 276 : const int block_size) { 277 40197700 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 278 40197700 : if (existing_blk != NULL) { 279 : return existing_blk; 280 : } 281 : 282 : // Create a new block. 283 11327517 : dbm_block_t *new_blk = 284 11327517 : dbm_shard_promise_new_block(shard, row, col, block_size); 285 11327517 : dbm_shard_allocate_promised_blocks(shard); 286 : 287 11327517 : return new_blk; 288 : } 289 : 290 : // EOF