Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_shard.h"
8 : #include "../offload/offload_mempool.h"
9 : #include "dbm_hyperparams.h"
10 :
11 : #include <assert.h>
12 : #include <omp.h>
13 : #include <stdbool.h>
14 : #include <stddef.h>
15 : #include <stdlib.h>
16 : #include <string.h>
17 :
18 : /*******************************************************************************
19 : * \brief Internal routine for finding a power of two greater than given number.
20 : * \author Ole Schuett
21 : ******************************************************************************/
22 8058595 : static int next_power2(const int start) {
23 8058595 : int candidate = 2;
24 29465312 : while (candidate < start) {
25 21406717 : candidate *= 2;
26 : }
27 8058595 : return candidate;
28 : }
29 :
30 : /*******************************************************************************
31 : * \brief Internal routine for finding a prime greater equal than given number.
32 : * \author Ole Schuett
33 : ******************************************************************************/
34 8058595 : static int next_prime(const int start) {
35 8058595 : int candidate = start, divisor = 0;
36 31893503 : while (divisor < candidate) {
37 509171302 : for (divisor = 2; divisor < candidate; divisor++) {
38 501112707 : if (candidate % divisor == 0) {
39 15776313 : candidate++;
40 15776313 : break;
41 : }
42 : }
43 : }
44 8058595 : return candidate;
45 : }
46 :
47 : /*******************************************************************************
48 : * \brief Internal routine for initializing a shard's hashtable.
49 : * \author Ole Schuett
50 : ******************************************************************************/
51 8058595 : static void hashtable_init(dbm_shard_t *shard) {
52 : // Choosing size as power of two allows to replace modulo with bitwise AND.
53 16117190 : shard->hashtable_size =
54 8058595 : next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated);
55 8058595 : shard->hashtable_prime = next_prime(shard->hashtable_size);
56 8058595 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
57 8058595 : assert(shard->hashtable != NULL);
58 8058595 : }
59 :
60 : /*******************************************************************************
61 : * \brief Internal routine for initializing a shard.
62 : * \author Ole Schuett
63 : ******************************************************************************/
64 1717896 : void dbm_shard_init(dbm_shard_t *shard) {
65 1717896 : shard->nblocks = 0;
66 1717896 : shard->nblocks_allocated = 0;
67 1717896 : shard->blocks = NULL;
68 1717896 : hashtable_init(shard);
69 1717896 : shard->data_size = 0;
70 1717896 : shard->data_promised = 0;
71 1717896 : shard->data_allocated = 0;
72 1717896 : shard->data = NULL;
73 1717896 : omp_init_lock(&shard->lock);
74 1717896 : }
75 :
76 : /*******************************************************************************
77 : * \brief Internal routine for copying content of shard_b into shard_a.
78 : * \author Ole Schuett
79 : ******************************************************************************/
80 431186 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
81 431186 : assert(shard_a != NULL && shard_b != NULL);
82 :
83 431186 : if (shard_a->nblocks_allocated < shard_b->nblocks) {
84 403419 : free(shard_a->blocks);
85 403419 : shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
86 403419 : shard_a->nblocks_allocated = shard_b->nblocks;
87 403419 : assert(shard_a->blocks != NULL);
88 : }
89 431186 : shard_a->nblocks = shard_b->nblocks;
90 :
91 431186 : if (shard_a->hashtable_size < shard_b->hashtable_size) {
92 406244 : free(shard_a->hashtable);
93 406244 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
94 406244 : assert(shard_a->hashtable != NULL);
95 : }
96 431186 : shard_a->hashtable_size = shard_b->hashtable_size;
97 431186 : shard_a->hashtable_prime = shard_b->hashtable_prime;
98 :
99 431186 : if (shard_a->data_allocated < shard_b->data_size) {
100 403419 : offload_mempool_host_free(shard_a->data);
101 806838 : shard_a->data =
102 403419 : offload_mempool_host_malloc(shard_b->data_size * sizeof(double));
103 403419 : shard_a->data_allocated = shard_b->data_size;
104 403419 : assert(shard_a->data != NULL);
105 : }
106 431186 : shard_a->data_size = shard_b->data_size;
107 :
108 431186 : if (shard_b->nblocks != 0) {
109 403449 : assert(shard_a->blocks != NULL && shard_b->blocks != NULL);
110 403449 : memcpy(shard_a->blocks, shard_b->blocks,
111 403449 : shard_b->nblocks * sizeof(dbm_block_t));
112 : }
113 431186 : if (shard_b->hashtable_size != 0) {
114 431186 : assert(shard_a->hashtable != NULL && shard_b->hashtable != NULL);
115 431186 : memcpy(shard_a->hashtable, shard_b->hashtable,
116 431186 : shard_b->hashtable_size * sizeof(int));
117 : }
118 431186 : if (shard_b->data_size != 0) {
119 403449 : assert(shard_a->data != NULL && shard_b->data != NULL);
120 403449 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
121 : }
122 431186 : }
123 :
124 : /*******************************************************************************
125 : * \brief Internal routine for releasing a shard.
126 : * \author Ole Schuett
127 : ******************************************************************************/
128 1717896 : void dbm_shard_release(dbm_shard_t *shard) {
129 1717896 : free(shard->blocks);
130 1717896 : free(shard->hashtable);
131 1717896 : offload_mempool_host_free(shard->data);
132 1717896 : omp_destroy_lock(&shard->lock);
133 1717896 : }
134 :
135 : /*******************************************************************************
136 : * \brief Private hash function based on Cantor pairing function.
137 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
138 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
139 : * Using unsigned int to return a positive number even after overflow.
140 : * \author Ole Schuett
141 : ******************************************************************************/
142 251856908 : static inline unsigned int hash(const unsigned int row,
143 : const unsigned int col) {
144 251856908 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
145 : }
146 :
147 : /*******************************************************************************
148 : * \brief Internal routine for masking a slot in the hash-table.
149 : * \author Hans Pabst
150 : ******************************************************************************/
151 251856908 : static inline int hashtable_mask(const dbm_shard_t *shard) {
152 251856908 : return shard->hashtable_size - 1;
153 : }
154 :
155 : /*******************************************************************************
156 : * \brief Private routine for inserting a block into a shard's hashtable.
157 : * \author Ole Schuett
158 : ******************************************************************************/
159 134886545 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
160 134886545 : assert(0 <= block_idx && block_idx < shard->nblocks);
161 134886545 : const dbm_block_t *blk = &shard->blocks[block_idx];
162 134886545 : const unsigned int h = hash(blk->row, blk->col);
163 134886545 : int slot = (shard->hashtable_prime * h) & hashtable_mask(shard);
164 134976857 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
165 143877692 : for (int i = slot; i < shard->hashtable_size; ++i) {
166 143832536 : if (shard->hashtable[i] == 0) { // 0 means empty
167 134886545 : shard->hashtable[i] = block_idx + 1; // 1-based
168 134886545 : return;
169 : }
170 : }
171 : }
172 : }
173 :
174 : /*******************************************************************************
175 : * \brief Internal routine for looking up a block from a shard.
176 : * \author Ole Schuett
177 : ******************************************************************************/
178 116970363 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
179 : const int col) {
180 116970363 : int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard);
181 96317 : for (;; slot = (slot + 1) & hashtable_mask(shard)) { // linear probing
182 125378984 : for (int i = slot; i < shard->hashtable_size; ++i) {
183 125282667 : const int block_idx = shard->hashtable[i];
184 125282667 : if (block_idx == 0) { // 1-based, 0 means empty
185 : return NULL; // block not found
186 : }
187 81808998 : assert(0 < block_idx && block_idx <= shard->nblocks);
188 81808998 : dbm_block_t *blk = &shard->blocks[block_idx - 1];
189 81808998 : if (blk->row == row && blk->col == col) {
190 : return blk;
191 : }
192 : }
193 : }
194 : }
195 :
196 : /*******************************************************************************
197 : * \brief Internal routine for allocating the metadata of a new block.
198 : * \author Ole Schuett
199 : ******************************************************************************/
200 53193603 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
201 : const int col, const int block_size) {
202 : // Grow blocks array if necessary.
203 53193603 : if (shard->nblocks_allocated < shard->nblocks + 1) {
204 6340699 : shard->nblocks_allocated = DBM_ALLOCATION_FACTOR * (shard->nblocks + 1);
205 6340699 : assert((shard->nblocks + 1) <= shard->nblocks_allocated);
206 6340699 : shard->blocks =
207 6340699 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
208 6340699 : assert(shard->blocks != NULL);
209 :
210 : // rebuild hashtable
211 6340699 : free(shard->hashtable);
212 6340699 : hashtable_init(shard);
213 88033641 : for (int i = 0; i < shard->nblocks; i++) {
214 81692942 : hashtable_insert(shard, i);
215 : }
216 : }
217 :
218 53193603 : const int new_block_idx = shard->nblocks;
219 53193603 : shard->nblocks++;
220 53193603 : dbm_block_t *new_block = &shard->blocks[new_block_idx];
221 53193603 : new_block->row = row;
222 53193603 : new_block->col = col;
223 53193603 : new_block->offset = shard->data_promised;
224 53193603 : shard->data_promised += block_size;
225 : // The data_size will be increased after the memory is allocated and zeroed.
226 53193603 : hashtable_insert(shard, new_block_idx);
227 53193603 : return new_block;
228 : }
229 :
230 : /*******************************************************************************
231 : * \brief Internal routine for allocating and zeroing any promised block's data.
232 : * \author Ole Schuett
233 : ******************************************************************************/
234 11571955 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
235 :
236 : // Reallocate data array if necessary.
237 11571955 : if (shard->data_promised > shard->data_allocated) {
238 1621331 : const double *data = shard->data;
239 1621331 : shard->data_allocated = DBM_ALLOCATION_FACTOR * shard->data_promised;
240 1621331 : assert(shard->data_promised <= shard->data_allocated);
241 3242662 : shard->data =
242 1621331 : offload_mempool_host_malloc(shard->data_allocated * sizeof(double));
243 1621331 : assert(shard->data != NULL);
244 1621331 : if (data != NULL) {
245 520610 : memcpy(shard->data, data, shard->data_size * sizeof(double));
246 520610 : offload_mempool_host_free(data);
247 : }
248 : }
249 :
250 : // Zero new blocks.
251 : // The following memset is usually the first touch of the memory, which leads
252 : // to frequent page faults. The executing thread determines the NUMA location
253 11571955 : if (shard->data_promised > shard->data_size) {
254 11244382 : const int tail = shard->data_promised - shard->data_size;
255 11244382 : memset(shard->data + shard->data_size, 0, tail * sizeof(double));
256 11244382 : shard->data_size = shard->data_promised;
257 : }
258 11571955 : }
259 :
260 : /*******************************************************************************
261 : * \brief Internal routine for getting block or promising a new one.
262 : * \author Ole Schuett
263 : ******************************************************************************/
264 28941084 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
265 : const int col,
266 : const int block_size) {
267 28941084 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
268 28941084 : if (existing_blk != NULL) {
269 : return existing_blk;
270 : } else {
271 26710178 : return dbm_shard_promise_new_block(shard, row, col, block_size);
272 : }
273 : }
274 :
275 : /*******************************************************************************
276 : * \brief Internal routine for getting block or allocating a new one.
277 : * \author Ole Schuett
278 : ******************************************************************************/
279 38554993 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
280 : const int col,
281 : const int block_size) {
282 38554993 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
283 38554993 : if (existing_blk != NULL) {
284 : return existing_blk;
285 : }
286 :
287 : // Create a new block.
288 9929703 : dbm_block_t *new_blk =
289 9929703 : dbm_shard_promise_new_block(shard, row, col, block_size);
290 9929703 : dbm_shard_allocate_promised_blocks(shard);
291 :
292 9929703 : return new_blk;
293 : }
294 :
295 : // EOF
|