Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_shard.h"
8 : #include "../offload/offload_mempool.h"
9 : #include "dbm_hyperparams.h"
10 :
11 : #include <assert.h>
12 : #include <omp.h>
13 : #include <stdbool.h>
14 : #include <stddef.h>
15 : #include <stdlib.h>
16 : #include <string.h>
17 :
18 : /*******************************************************************************
19 : * \brief Internal routine for finding a power of two greater than given number.
20 : * \author Ole Schuett
21 : ******************************************************************************/
22 8981335 : static int next_power2(const int start) {
23 8981335 : int candidate = 2;
24 32393669 : while (candidate < start) {
25 23412334 : candidate *= 2;
26 : }
27 8981335 : return candidate;
28 : }
29 :
30 : /*******************************************************************************
31 : * \brief Internal routine for finding a prime greater equal than given number.
32 : * \author Ole Schuett
33 : ******************************************************************************/
34 8981335 : static int next_prime(const int start) {
35 8981335 : int candidate = start, divisor = 0;
36 35301919 : while (divisor < candidate) {
37 547500253 : for (divisor = 2; divisor < candidate; divisor++) {
38 538518918 : if (candidate % divisor == 0) {
39 17339249 : candidate++;
40 17339249 : break;
41 : }
42 : }
43 : }
44 8981335 : return candidate;
45 : }
46 :
47 : /*******************************************************************************
48 : * \brief Internal routine for initializing a shard's hashtable.
49 : * \author Ole Schuett
50 : ******************************************************************************/
51 8981335 : static void hashtable_init(dbm_shard_t *shard) {
52 : // Choosing size as power of two allows to replace modulo with bitwise AND.
53 17962670 : shard->hashtable_size =
54 8981335 : next_power2(DBM_HASHTABLE_FACTOR * shard->nblocks_allocated);
55 8981335 : shard->hashtable_prime = next_prime(shard->hashtable_size);
56 8981335 : shard->hashtable = calloc(shard->hashtable_size, sizeof(int));
57 8981335 : assert(shard->hashtable != NULL);
58 8981335 : }
59 :
60 : /*******************************************************************************
61 : * \brief Internal routine for initializing a shard.
62 : * \author Ole Schuett
63 : ******************************************************************************/
64 1956802 : void dbm_shard_init(dbm_shard_t *shard) {
65 1956802 : shard->nblocks = 0;
66 1956802 : shard->nblocks_allocated = 0;
67 1956802 : shard->blocks = NULL;
68 1956802 : hashtable_init(shard);
69 1956802 : shard->data_size = 0;
70 1956802 : shard->data_promised = 0;
71 1956802 : shard->data_allocated = 0;
72 1956802 : shard->data = NULL;
73 1956802 : omp_init_lock(&shard->lock);
74 1956802 : }
75 :
76 : /*******************************************************************************
77 : * \brief Internal routine for copying content of shard_b into shard_a.
78 : * \author Ole Schuett
79 : ******************************************************************************/
80 488798 : void dbm_shard_copy(dbm_shard_t *shard_a, const dbm_shard_t *shard_b) {
81 488798 : assert(shard_a != NULL && shard_b != NULL);
82 :
83 488798 : if (shard_a->nblocks_allocated < shard_b->nblocks) {
84 461139 : free(shard_a->blocks);
85 461139 : shard_a->blocks = malloc(shard_b->nblocks * sizeof(dbm_block_t));
86 461139 : shard_a->nblocks_allocated = shard_b->nblocks;
87 461139 : assert(shard_a->blocks != NULL);
88 : }
89 488798 : shard_a->nblocks = shard_b->nblocks;
90 :
91 488798 : if (shard_a->hashtable_size < shard_b->hashtable_size) {
92 463958 : free(shard_a->hashtable);
93 463958 : shard_a->hashtable = malloc(shard_b->hashtable_size * sizeof(int));
94 463958 : assert(shard_a->hashtable != NULL);
95 : }
96 488798 : shard_a->hashtable_size = shard_b->hashtable_size;
97 488798 : shard_a->hashtable_prime = shard_b->hashtable_prime;
98 :
99 488798 : if (shard_a->data_allocated < shard_b->data_size) {
100 461139 : offload_mempool_host_free(shard_a->data);
101 922278 : shard_a->data =
102 461139 : offload_mempool_host_malloc(shard_b->data_size * sizeof(double));
103 461139 : shard_a->data_allocated = shard_b->data_size;
104 461139 : assert(shard_a->data != NULL);
105 : }
106 488798 : shard_a->data_size = shard_b->data_size;
107 :
108 488798 : if (shard_b->nblocks != 0) {
109 461169 : assert(shard_a->blocks != NULL && shard_b->blocks != NULL);
110 461169 : memcpy(shard_a->blocks, shard_b->blocks,
111 461169 : shard_b->nblocks * sizeof(dbm_block_t));
112 : }
113 488798 : if (shard_b->hashtable_size != 0) {
114 488798 : assert(shard_a->hashtable != NULL && shard_b->hashtable != NULL);
115 488798 : memcpy(shard_a->hashtable, shard_b->hashtable,
116 488798 : shard_b->hashtable_size * sizeof(int));
117 : }
118 488798 : if (shard_b->data_size != 0) {
119 461169 : assert(shard_a->data != NULL && shard_b->data != NULL);
120 461169 : memcpy(shard_a->data, shard_b->data, shard_b->data_size * sizeof(double));
121 : }
122 488798 : }
123 :
124 : /*******************************************************************************
125 : * \brief Internal routine for releasing a shard.
126 : * \author Ole Schuett
127 : ******************************************************************************/
128 1956802 : void dbm_shard_release(dbm_shard_t *shard) {
129 1956802 : free(shard->blocks);
130 1956802 : free(shard->hashtable);
131 1956802 : offload_mempool_host_free(shard->data);
132 1956802 : omp_destroy_lock(&shard->lock);
133 1956802 : }
134 :
135 : /*******************************************************************************
136 : * \brief Private hash function based on Cantor pairing function.
137 : * https://en.wikipedia.org/wiki/Pairing_function#Cantor_pairing_function
138 : * Szudzik's elegant pairing proved to be too asymmetric wrt. row / col.
139 : * Using unsigned int to return a positive number even after overflow.
140 : * \author Ole Schuett
141 : ******************************************************************************/
142 269882477 : static inline unsigned int hash(const unsigned int row,
143 : const unsigned int col) {
144 269882477 : return (row + col) * (row + col + 1) / 2 + row; // Division by 2 is cheap.
145 : }
146 :
147 : /*******************************************************************************
148 : * \brief Internal routine for masking a slot in the hash-table.
149 : * \author Hans Pabst
150 : ******************************************************************************/
151 269882477 : static inline int hashtable_mask(const dbm_shard_t *shard) {
152 269882477 : return shard->hashtable_size - 1;
153 : }
154 :
155 : /*******************************************************************************
156 : * \brief Private routine for inserting a block into a shard's hashtable.
157 : * \author Ole Schuett
158 : ******************************************************************************/
159 144934906 : static void hashtable_insert(dbm_shard_t *shard, const int block_idx) {
160 144934906 : assert(0 <= block_idx && block_idx < shard->nblocks);
161 144934906 : const dbm_block_t *blk = &shard->blocks[block_idx];
162 144934906 : const unsigned int h = hash(blk->row, blk->col);
163 144934906 : int slot = (shard->hashtable_prime * h) & hashtable_mask(shard);
164 154544855 : for (int i = 0; i < shard->hashtable_size; ++i) { // linear probing
165 154544855 : if (shard->hashtable[slot] == 0) { // 0 means empty
166 144934906 : shard->hashtable[slot] = block_idx + 1; // 1-based
167 144934906 : return;
168 : }
169 9609949 : slot = (slot + 1) & hashtable_mask(shard);
170 : }
171 0 : assert(false);
172 : }
173 :
174 : /*******************************************************************************
175 : * \brief Internal routine for looking up a block from a shard.
176 : * \author Ole Schuett
177 : ******************************************************************************/
178 124947571 : dbm_block_t *dbm_shard_lookup(const dbm_shard_t *shard, const int row,
179 : const int col) {
180 124947571 : int slot = (shard->hashtable_prime * hash(row, col)) & hashtable_mask(shard);
181 133834000 : for (int i = 0; i < shard->hashtable_size; ++i) { // linear probing
182 133834000 : const int block_idx = shard->hashtable[slot];
183 133834000 : if (block_idx == 0) { // 1-based, 0 means empty
184 : return NULL; // block not found
185 : }
186 86944175 : assert(0 < block_idx && block_idx <= shard->nblocks);
187 86944175 : dbm_block_t *blk = &shard->blocks[block_idx - 1];
188 86944175 : if (blk->row == row && blk->col == col) {
189 : return blk;
190 : }
191 8886429 : slot = (slot + 1) & hashtable_mask(shard);
192 : }
193 : return NULL;
194 : }
195 :
196 : /*******************************************************************************
197 : * \brief Internal routine for allocating the metadata of a new block.
198 : * \author Ole Schuett
199 : ******************************************************************************/
200 57340674 : dbm_block_t *dbm_shard_promise_new_block(dbm_shard_t *shard, const int row,
201 : const int col, const int block_size) {
202 : // Grow blocks array if necessary.
203 57340674 : if (shard->nblocks_allocated < shard->nblocks + 1) {
204 7024533 : shard->nblocks_allocated = DBM_ALLOCATION_FACTOR * (shard->nblocks + 1);
205 7024533 : assert((shard->nblocks + 1) <= shard->nblocks_allocated);
206 7024533 : shard->blocks =
207 7024533 : realloc(shard->blocks, shard->nblocks_allocated * sizeof(dbm_block_t));
208 7024533 : assert(shard->blocks != NULL);
209 :
210 : // rebuild hashtable
211 7024533 : free(shard->hashtable);
212 7024533 : hashtable_init(shard);
213 94618765 : for (int i = 0; i < shard->nblocks; i++) {
214 87594232 : hashtable_insert(shard, i);
215 : }
216 : }
217 :
218 57340674 : const int new_block_idx = shard->nblocks;
219 57340674 : shard->nblocks++;
220 57340674 : dbm_block_t *new_block = &shard->blocks[new_block_idx];
221 57340674 : new_block->row = row;
222 57340674 : new_block->col = col;
223 57340674 : new_block->offset = shard->data_promised;
224 57340674 : shard->data_promised += block_size;
225 : // The data_size will be increased after the memory is allocated and zeroed.
226 57340674 : hashtable_insert(shard, new_block_idx);
227 57340674 : return new_block;
228 : }
229 :
230 : /*******************************************************************************
231 : * \brief Internal routine for allocating and zeroing any promised block's data.
232 : * \author Ole Schuett
233 : ******************************************************************************/
234 12703248 : void dbm_shard_allocate_promised_blocks(dbm_shard_t *shard) {
235 : // Reallocate data array if necessary.
236 12703248 : if (shard->data_allocated < shard->data_promised) {
237 1821707 : const double *data = shard->data;
238 1821707 : shard->data_allocated = DBM_ALLOCATION_FACTOR * shard->data_promised;
239 1821707 : assert(shard->data_promised <= shard->data_allocated);
240 3643414 : shard->data =
241 1821707 : offload_mempool_host_malloc(shard->data_allocated * sizeof(double));
242 1821707 : assert(shard->data != NULL);
243 1821707 : if (data != NULL) {
244 570246 : memcpy(shard->data, data, shard->data_size * sizeof(double));
245 570246 : offload_mempool_host_free(data);
246 : }
247 : }
248 :
249 : // Zero new blocks.
250 : // The following memset is usually the first touch of the memory, which leads
251 : // to frequent page faults. The executing thread determines the NUMA location
252 12703248 : if (shard->data_size < shard->data_promised) {
253 12335466 : const int tail = shard->data_promised - shard->data_size;
254 12335466 : memset(shard->data + shard->data_size, 0, tail * sizeof(double));
255 12335466 : shard->data_size = shard->data_promised;
256 : }
257 12703248 : }
258 :
259 : /*******************************************************************************
260 : * \brief Internal routine for getting block or promising a new one.
261 : * \author Ole Schuett
262 : ******************************************************************************/
263 31094982 : dbm_block_t *dbm_shard_get_or_promise_block(dbm_shard_t *shard, const int row,
264 : const int col,
265 : const int block_size) {
266 31094982 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
267 31094982 : if (existing_blk != NULL) {
268 : return existing_blk;
269 : } else {
270 28812100 : return dbm_shard_promise_new_block(shard, row, col, block_size);
271 : }
272 : }
273 :
274 : /*******************************************************************************
275 : * \brief Internal routine for getting block or allocating a new one.
276 : * \author Ole Schuett
277 : ******************************************************************************/
278 41623301 : dbm_block_t *dbm_shard_get_or_allocate_block(dbm_shard_t *shard, const int row,
279 : const int col,
280 : const int block_size) {
281 41623301 : dbm_block_t *existing_blk = dbm_shard_lookup(shard, row, col);
282 41623301 : if (existing_blk != NULL) {
283 : return existing_blk;
284 : }
285 :
286 : // Create a new block.
287 10853604 : dbm_block_t *new_blk =
288 10853604 : dbm_shard_promise_new_block(shard, row, col, block_size);
289 10853604 : dbm_shard_allocate_promised_blocks(shard);
290 :
291 10853604 : return new_blk;
292 : }
293 :
294 : // EOF
|