Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_shard.h"
8 : #include "../offload/offload_mempool.h"
9 : #include "dbm_hyperparams.h"
10 :
11 : #include <assert.h>
12 : #include <omp.h>
13 : #include <stdbool.h>
14 : #include <stddef.h>
15 : #include <stdlib.h>
16 : #include <string.h>
17 :
18 : /*******************************************************************************
19 : * \brief Internal routine for finding a power of two greater than given number.
20 : * \author Ole Schuett
21 : ******************************************************************************/
22 8992949 : static int next_power2(const int start) {
23 8992949 : int candidate = 2;
24 32429023 : while (candidate < start) {
25 23436074 : candidate *= 2;
26 : }
27 8992949 : return candidate;
28 : }
29 :
30 : /*******************************************************************************
31 : * \brief Internal routine for finding a prime greater equal than given number.
32 : * \author Ole Schuett
33 : ******************************************************************************/
34 8992949 : static int next_prime(const int start) {
35 8992949 : int candidate = start, divisor = 0;
36 35344689 : while (divisor < candidate) {
37 547737015 : for (divisor = 2; divisor < candidate; divisor++) {
38 538744066 : if (candidate % divisor == 0) {
39 17358791 : candidate++;
40 17358791 : break;
41 : }
42 : }
43 : }
44 8992949 : return candidate;
45 : }
46 :
47 : /*******************************************************************************
48 : * \brief Internal routine for initializing a shard's hashtable.
49 : * \author Ole Schuett
50 : ******************************************************************************/
51 8992949 : static void hashtable_init(dbm_shard_t *shard) {
52 : // Choosing size as power of two allows to replace modulo with bitwise AND.
53 17985898 : shard->hashtable_size =
54 8992949 : next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated);
55 8992949 : shard->hashtable_prime = next_prime(shard->hashtable_size);
56 8992949 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
57 8992949 : assert(shard->hashtable != NULL);
58 8992949 : }
59 :
60 : /*******************************************************************************
61 : * \brief Internal routine for initializing a shard.
62 : * \author Ole Schuett
63 : ******************************************************************************/
64 1959766 : void dbm_shard_init(dbm_shard_t *shard) {
65 1959766 : shard->nblocks = 0;
66 1959766 : shard->nblocks_allocated = 0;
67 1959766 : shard->blocks = NULL;
68 1959766 : hashtable_init(shard);
69 1959766 : shard->data_size = 0;
70 1959766 : shard->data_promised = 0;
71 1959766 : shard->data_allocated = 0;
72 1959766 : shard->data = NULL;
73 1959766 : omp_init_lock(&shard->lock);
74 1959766 : }
75 :
76 : /*******************************************************************************
77 : * \brief Internal routine for copying content of shard_b into shard_a.
78 : * \author Ole Schuett
79 : ******************************************************************************/
80 489662 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
81 489662 : assert(shard_a != NULL && shard_b != NULL);
82 :
83 489662 : if (shard_a->nblocks_allocated < shard_b->nblocks) {
84 461859 : free(shard_a->blocks);
85 461859 : shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
86 461859 : shard_a->nblocks_allocated = shard_b->nblocks;
87 461859 : assert(shard_a->blocks != NULL);
88 : }
89 489662 : shard_a->nblocks = shard_b->nblocks;
90 :
91 489662 : if (shard_a->hashtable_size < shard_b->hashtable_size) {
92 464678 : free(shard_a->hashtable);
93 464678 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
94 464678 : assert(shard_a->hashtable != NULL);
95 : }
96 489662 : shard_a->hashtable_size = shard_b->hashtable_size;
97 489662 : shard_a->hashtable_prime = shard_b->hashtable_prime;
98 :
99 489662 : if (shard_a->data_allocated < shard_b->data_size) {
100 461859 : offload_mempool_host_free(shard_a->data);
101 923718 : shard_a->data =
102 461859 : offload_mempool_host_malloc(shard_b->data_size * sizeof(double));
103 461859 : shard_a->data_allocated = shard_b->data_size;
104 461859 : assert(shard_a->data != NULL);
105 : }
106 489662 : shard_a->data_size = shard_b->data_size;
107 :
108 489662 : if (shard_b->nblocks != 0) {
109 461889 : assert(shard_a->blocks != NULL && shard_b->blocks != NULL);
110 461889 : memcpy(shard_a->blocks, shard_b->blocks,
111 461889 : shard_b->nblocks * sizeof(dbm_block_t));
112 : }
113 489662 : if (shard_b->hashtable_size != 0) {
114 489662 : assert(shard_a->hashtable != NULL && shard_b->hashtable != NULL);
115 489662 : memcpy(shard_a->hashtable, shard_b->hashtable,
116 489662 : shard_b->hashtable_size * sizeof(int));
117 : }
118 489662 : if (shard_b->data_size != 0) {
119 461889 : assert(shard_a->data != NULL && shard_b->data != NULL);
120 461889 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
121 : }
122 489662 : }
123 :
124 : /*******************************************************************************
125 : * \brief Internal routine for releasing a shard.
126 : * \author Ole Schuett
127 : ******************************************************************************/
128 1959766 : void dbm_shard_release(dbm_shard_t *shard) {
129 1959766 : free(shard->blocks);
130 1959766 : free(shard->hashtable);
131 1959766 : offload_mempool_host_free(shard->data);
132 1959766 : omp_destroy_lock(&shard->lock);
133 1959766 : }
134 :
135 : /*******************************************************************************
136 : * \brief Private hash function based on Cantor pairing function.
137 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
138 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
139 : * Using unsigned int to return a positive number even after overflow.
140 : * \author Ole Schuett
141 : ******************************************************************************/
142 269986437 : static inline unsigned int hash(const unsigned int row,
143 : const unsigned int col) {
144 269986437 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
145 : }
146 :
147 : /*******************************************************************************
148 : * \brief Internal routine for masking a slot in the hash-table.
149 : * \author Hans Pabst
150 : ******************************************************************************/
151 269986437 : static inline int hashtable_mask(const dbm_shard_t *shard) {
152 269986437 : return shard->hashtable_size - 1;
153 : }
154 :
155 : /*******************************************************************************
156 : * \brief Private routine for inserting a block into a shard's hashtable.
157 : * \author Ole Schuett
158 : ******************************************************************************/
159 144989872 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
160 144989872 : assert(0 <= block_idx && block_idx < shard->nblocks);
161 144989872 : const dbm_block_t *blk = &shard->blocks[block_idx];
162 144989872 : const unsigned int h = hash(blk->row, blk->col);
163 : // Bounded probe: at most hashtable_size steps (load factor < 1 guarantees
164 : // termination). Avoids unbounded scans when the slot variable wrapped around
165 : // via the mask but the inner iteration continued past the table end.
166 144989872 : int slot = (shard->hashtable_prime * h) & hashtable_mask(shard);
167 154598156 : for (int i = 0; i < shard->hashtable_size; ++i) { // linear probing
168 154598156 : if (shard->hashtable[slot] == 0) { // 0 means empty
169 144989872 : shard->hashtable[slot] = block_idx + 1; // 1-based
170 144989872 : return;
171 : }
172 9608284 : slot = (slot + 1) & hashtable_mask(shard);
173 : }
174 0 : assert(false);
175 : }
176 :
177 : /*******************************************************************************
178 : * \brief Internal routine for looking up a block from a shard.
179 : * \author Ole Schuett
180 : ******************************************************************************/
181 124996565 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
182 : const int col) {
183 : // Bounded probe count prevents scanning the entire table on a miss when
184 : // clusters exist (previous code could re-enter via slot wrap and re-scan).
185 124996565 : int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard);
186 133881917 : for (int i = 0; i < shard->hashtable_size; ++i) { // linear probing
187 133881917 : const int block_idx = shard->hashtable[slot];
188 133881917 : if (block_idx == 0) { // 1-based, 0 means empty
189 : return NULL; // block not found
190 : }
191 86973562 : assert(0 < block_idx && block_idx <= shard->nblocks);
192 86973562 : dbm_block_t *blk = &shard->blocks[block_idx - 1];
193 86973562 : if (blk->row == row && blk->col == col) {
194 : return blk;
195 : }
196 8885352 : slot = (slot + 1) & hashtable_mask(shard);
197 : }
198 : return NULL;
199 : }
200 :
201 : /*******************************************************************************
202 : * \brief Internal routine for allocating the metadata of a new block.
203 : * \author Ole Schuett
204 : ******************************************************************************/
205 57364476 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
206 : const int col, const int block_size) {
207 : // Grow blocks array if necessary.
208 57364476 : if (shard->nblocks_allocated < shard->nblocks + 1) {
209 7033183 : shard->nblocks_allocated = DBM_ALLOCATION_FACTOR * (shard->nblocks + 1);
210 7033183 : assert((shard->nblocks + 1) <= shard->nblocks_allocated);
211 7033183 : shard->blocks =
212 7033183 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
213 7033183 : assert(shard->blocks != NULL);
214 :
215 : // rebuild hashtable
216 7033183 : free(shard->hashtable);
217 7033183 : hashtable_init(shard);
218 94658579 : for (int i = 0; i < shard->nblocks; i++) {
219 87625396 : hashtable_insert(shard, i);
220 : }
221 : }
222 :
223 57364476 : const int new_block_idx = shard->nblocks;
224 57364476 : shard->nblocks++;
225 57364476 : dbm_block_t *new_block = &shard->blocks[new_block_idx];
226 57364476 : new_block->row = row;
227 57364476 : new_block->col = col;
228 57364476 : new_block->offset = shard->data_promised;
229 57364476 : shard->data_promised += block_size;
230 : // The data_size will be increased after the memory is allocated and zeroed.
231 57364476 : hashtable_insert(shard, new_block_idx);
232 57364476 : return new_block;
233 : }
234 :
235 : /*******************************************************************************
236 : * \brief Internal routine for allocating and zeroing any promised block's data.
237 : * \author Ole Schuett
238 : ******************************************************************************/
239 12709772 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
240 : // Reallocate data array if necessary.
241 12709772 : if (shard->data_allocated < shard->data_promised) {
242 1824228 : const double *data = shard->data;
243 1824228 : shard->data_allocated = DBM_ALLOCATION_FACTOR * shard->data_promised;
244 1824228 : assert(shard->data_promised <= shard->data_allocated);
245 3648456 : shard->data =
246 1824228 : offload_mempool_host_malloc(shard->data_allocated * sizeof(double));
247 1824228 : assert(shard->data != NULL);
248 1824228 : if (data != NULL) {
249 571029 : memcpy(shard->data, data, shard->data_size * sizeof(double));
250 571029 : offload_mempool_host_free(data);
251 : }
252 : }
253 :
254 : // Zero new blocks.
255 : // The following memset is usually the first touch of the memory, which leads
256 : // to frequent page faults. The executing thread determines the NUMA location
257 12709772 : if (shard->data_size < shard->data_promised) {
258 12341152 : const int tail = shard->data_promised - shard->data_size;
259 12341152 : memset(shard->data + shard->data_size, 0, tail * sizeof(double));
260 12341152 : shard->data_size = shard->data_promised;
261 : }
262 12709772 : }
263 :
264 : /*******************************************************************************
265 : * \brief Internal routine for getting block or promising a new one.
266 : * \author Ole Schuett
267 : ******************************************************************************/
268 31108986 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
269 : const int col,
270 : const int block_size) {
271 31108986 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
272 31108986 : if (existing_blk != NULL) {
273 : return existing_blk;
274 : } else {
275 28823477 : return dbm_shard_promise_new_block(shard, row, col, block_size);
276 : }
277 : }
278 :
279 : /*******************************************************************************
280 : * \brief Internal routine for getting block or allocating a new one.
281 : * \author Ole Schuett
282 : ******************************************************************************/
283 41640491 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
284 : const int col,
285 : const int block_size) {
286 41640491 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
287 41640491 : if (existing_blk != NULL) {
288 : return existing_blk;
289 : }
290 :
291 : // Create a new block.
292 10856936 : dbm_block_t *new_blk =
293 10856936 : dbm_shard_promise_new_block(shard, row, col, block_size);
294 10856936 : dbm_shard_allocate_promised_blocks(shard);
295 :
296 10856936 : return new_blk;
297 : }
298 :
299 : // EOF
|