Line data Source code
1 : /*----------------------------------------------------------------------------*/ 2 : /* CP2K: A general program to perform molecular dynamics simulations */ 3 : /* Copyright 2000-2024 CP2K developers group <https://cp2k.org> */ 4 : /* */ 5 : /* SPDX-License-Identifier: BSD-3-Clause */ 6 : /*----------------------------------------------------------------------------*/ 7 : #include <assert.h> 8 : #include <omp.h> 9 : #include <stdbool.h> 10 : #include <stddef.h> 11 : #include <stdlib.h> 12 : #include <string.h> 13 : 14 : #include "dbm_hyperparams.h" 15 : #include "dbm_shard.h" 16 : 17 : /******************************************************************************* 18 : * \brief Internal routine for finding a power of two greater than given number. 19 : * \author Ole Schuett 20 : ******************************************************************************/ 21 1176327 : static int next_power2(const int start) { 22 1176327 : int candidate = 2; 23 10665333 : while (candidate < start) { 24 9489006 : candidate *= 2; 25 : } 26 1176327 : return candidate; 27 : } 28 : 29 : /******************************************************************************* 30 : * \brief Internal routine for finding a prime greater equal than given number. 31 : * \author Ole Schuett 32 : ******************************************************************************/ 33 1176327 : static int next_prime(const int start) { 34 1176327 : int candidate = start, divisor = 0; 35 12805695 : while (divisor < candidate) { 36 717405918 : for (divisor = 2; divisor < candidate; divisor++) { 37 716229591 : if (candidate % divisor == 0) { 38 10453041 : candidate++; 39 10453041 : break; 40 : } 41 : } 42 : } 43 1176327 : return candidate; 44 : } 45 : 46 : /******************************************************************************* 47 : * \brief Internal routine for initializing a shard's hashtable. 48 : * \author Ole Schuett 49 : ******************************************************************************/ 50 1176327 : static void hashtable_init(dbm_shard_t *shard) { 51 : // Choosing size as power of two allows to replace modulo with bitwise AND. 52 2352654 : shard->hashtable_size = 53 1176327 : next_power2(HASHTABLE_FACTOR * shard->nblocks_allocated); 54 1176327 : shard->hashtable_mask = shard->hashtable_size - 1; 55 1176327 : shard->hashtable_prime = next_prime(shard->hashtable_size); 56 1176327 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int)); 57 1176327 : } 58 : 59 : /******************************************************************************* 60 : * \brief Internal routine for initializing a shard. 61 : * \author Ole Schuett 62 : ******************************************************************************/ 63 1097007 : void dbm_shard_init(dbm_shard_t *shard) { 64 1097007 : shard->nblocks = 0; 65 1097007 : shard->nblocks_allocated = INITIAL_NBLOCKS_ALLOCATED; 66 1097007 : shard->blocks = malloc(shard->nblocks_allocated * sizeof(dbm_block_t)); 67 1097007 : hashtable_init(shard); 68 1097007 : shard->data_size = 0; 69 1097007 : shard->data_promised = 0; 70 1097007 : shard->data_allocated = INITIAL_DATA_ALLOCATED; 71 1097007 : shard->data = malloc(shard->data_allocated * sizeof(double)); 72 : 73 1097007 : omp_init_lock(&shard->lock); 74 1097007 : } 75 : 76 : /******************************************************************************* 77 : * \brief Internal routine for copying content of shard_b into shard_a. 78 : * \author Ole Schuett 79 : ******************************************************************************/ 80 256044 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) { 81 256044 : free(shard_a->blocks); 82 256044 : shard_a->nblocks = shard_b->nblocks; 83 256044 : shard_a->nblocks_allocated = shard_b->nblocks_allocated; 84 256044 : shard_a->blocks = malloc(shard_b->nblocks_allocated * sizeof(dbm_block_t)); 85 256044 : memcpy(shard_a->blocks, shard_b->blocks, 86 256044 : shard_b->nblocks * sizeof(dbm_block_t)); 87 : 88 256044 : free(shard_a->hashtable); 89 256044 : shard_a->hashtable_size = shard_b->hashtable_size; 90 256044 : shard_a->hashtable_mask = shard_b->hashtable_mask; 91 256044 : shard_a->hashtable_prime = shard_b->hashtable_prime; 92 256044 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int)); 93 256044 : memcpy(shard_a->hashtable, shard_b->hashtable, 94 256044 : shard_b->hashtable_size * sizeof(int)); 95 : 96 256044 : free(shard_a->data); 97 256044 : shard_a->data_allocated = shard_b->data_allocated; 98 256044 : shard_a->data = malloc(shard_b->data_allocated * sizeof(double)); 99 256044 : shard_a->data_size = shard_b->data_size; 100 256044 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double)); 101 256044 : } 102 : 103 : /******************************************************************************* 104 : * \brief Internal routine for releasing a shard. 105 : * \author Ole Schuett 106 : ******************************************************************************/ 107 1097007 : void dbm_shard_release(dbm_shard_t *shard) { 108 1097007 : free(shard->blocks); 109 1097007 : free(shard->hashtable); 110 1097007 : free(shard->data); 111 1097007 : omp_destroy_lock(&shard->lock); 112 1097007 : } 113 : 114 : /******************************************************************************* 115 : * \brief Private hash function based on Cantor pairing function. 116 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function 117 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col. 118 : * Using unsigned int to return a positive number even after overflow. 119 : * \author Ole Schuett 120 : ******************************************************************************/ 121 153494600 : static inline unsigned int hash(const unsigned int row, 122 : const unsigned int col) { 123 153494600 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap. 124 : } 125 : 126 : /******************************************************************************* 127 : * \brief Private routine for inserting a block into a shard's hashtable. 128 : * \author Ole Schuett 129 : ******************************************************************************/ 130 60439354 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) { 131 60439354 : assert(0 <= block_idx && block_idx < shard->nblocks); 132 60439354 : const dbm_block_t *blk = &shard->blocks[block_idx]; 133 60439354 : const int row = blk->row, col = blk->col; 134 60439354 : int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask; 135 65905574 : while (true) { 136 63172464 : if (shard->hashtable[slot] == 0) { 137 60439354 : shard->hashtable[slot] = block_idx + 1; // 1-based because 0 means empty 138 60439354 : return; 139 : } 140 : // linear probing 141 2733110 : slot = (slot + 1) & shard->hashtable_mask; 142 : } 143 : } 144 : 145 : /******************************************************************************* 146 : * \brief Internal routine for looking up a block from a shard. 147 : * \author Ole Schuett 148 : ******************************************************************************/ 149 93055246 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row, 150 : const int col) { 151 93055246 : int slot = (shard->hashtable_prime * hash(row, col)) & shard->hashtable_mask; 152 99473134 : while (true) { 153 96264190 : const int block_idx = shard->hashtable[slot] - 1; // 1-based, 0 means empty. 154 96264190 : if (block_idx < 0) { 155 : return NULL; // block not found 156 : } 157 60976275 : assert(0 <= block_idx && block_idx < shard->nblocks); 158 60976275 : dbm_block_t *blk = &shard->blocks[block_idx]; 159 60976275 : if (blk->row == row && blk->col == col) { 160 : return blk; 161 : } 162 : // linear probing 163 3208944 : slot = (slot + 1) & shard->hashtable_mask; 164 : } 165 : } 166 : 167 : /******************************************************************************* 168 : * \brief Internal routine for allocating the metadata of a new block. 169 : * \author Ole Schuett 170 : ******************************************************************************/ 171 42461888 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row, 172 : const int col, const int block_size) { 173 : // Grow blocks array if necessary. 174 42461888 : if (shard->nblocks_allocated < shard->nblocks + 1) { 175 79320 : shard->nblocks_allocated = ALLOCATION_FACTOR * (shard->nblocks + 1); 176 79320 : shard->blocks = 177 79320 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t)); 178 : 179 : // rebuild hashtable 180 79320 : free(shard->hashtable); 181 79320 : hashtable_init(shard); 182 18056786 : for (int i = 0; i < shard->nblocks; i++) { 183 17977466 : hashtable_insert(shard, i); 184 : } 185 : } 186 : 187 42461888 : const int new_block_idx = shard->nblocks; 188 42461888 : shard->nblocks++; 189 42461888 : dbm_block_t *new_block = &shard->blocks[new_block_idx]; 190 42461888 : new_block->row = row; 191 42461888 : new_block->col = col; 192 42461888 : new_block->offset = shard->data_promised; 193 42461888 : shard->data_promised += block_size; 194 : // The data_size will be increase after the memory is allocated and zeroed. 195 42461888 : hashtable_insert(shard, new_block_idx); 196 42461888 : return new_block; 197 : } 198 : 199 : /******************************************************************************* 200 : * \brief Internal routine for allocating and zeroing any promised block's data. 201 : * \author Ole Schuett 202 : ******************************************************************************/ 203 9518406 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) { 204 : 205 : // Reallocate data array if necessary. 206 9518406 : if (shard->data_promised > shard->data_allocated) { 207 355200 : shard->data_allocated = ALLOCATION_FACTOR * shard->data_promised; 208 355200 : shard->data = realloc(shard->data, shard->data_allocated * sizeof(double)); 209 : } 210 : 211 : // Zero new blocks. 212 : // The following memset is usually the first touch of the memory, which leads 213 : // to frequent page faults. The executing thread determines the NUMA location 214 9518406 : if (shard->data_promised > shard->data_size) { 215 9264482 : const int tail = shard->data_promised - shard->data_size; 216 9264482 : memset(&shard->data[shard->data_size], 0, tail * sizeof(double)); 217 9264482 : shard->data_size = shard->data_promised; 218 : } 219 9518406 : } 220 : 221 : /******************************************************************************* 222 : * \brief Internal routine for getting block or promising a new one. 223 : * \author Ole Schuett 224 : ******************************************************************************/ 225 23282409 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row, 226 : const int col, 227 : const int block_size) { 228 23282409 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 229 23282409 : if (existing_blk != NULL) { 230 : return existing_blk; 231 : } else { 232 21308601 : return dbm_shard_promise_new_block(shard, row, col, block_size); 233 : } 234 : } 235 : 236 : /******************************************************************************* 237 : * \brief Internal routine for getting block or allocating a new one. 238 : * \author Ole Schuett 239 : ******************************************************************************/ 240 31389338 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row, 241 : const int col, 242 : const int block_size) { 243 31389338 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col); 244 31389338 : if (existing_blk != NULL) { 245 : return existing_blk; 246 : } 247 : 248 : // Create a new block. 249 8362254 : dbm_block_t *new_blk = 250 8362254 : dbm_shard_promise_new_block(shard, row, col, block_size); 251 8362254 : dbm_shard_allocate_promised_blocks(shard); 252 : 253 8362254 : return new_blk; 254 : } 255 : 256 : // EOF