Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 :
8 : #include <assert.h>
9 : #include <math.h>
10 : #include <omp.h>
11 : #include <stdbool.h>
12 : #include <stddef.h>
13 : #include <stdlib.h>
14 : #include <string.h>
15 :
16 : #include "dbm_hyperparams.h"
17 : #include "dbm_matrix.h"
18 :
19 : /*******************************************************************************
20 : * \brief Creates a new matrix.
21 : * \author Ole Schuett
22 : ******************************************************************************/
23 1684494 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
24 : const char name[], const int nrows, const int ncols,
25 : const int row_sizes[nrows], const int col_sizes[ncols]) {
26 1684494 : assert(omp_get_num_threads() == 1);
27 :
28 1684494 : dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
29 :
30 1684494 : assert(dist->rows.length == nrows);
31 1684494 : assert(dist->cols.length == ncols);
32 1684494 : dbm_distribution_hold(dist);
33 1684494 : matrix->dist = dist;
34 :
35 1684494 : size_t size = (strlen(name) + 1) * sizeof(char);
36 1684494 : matrix->name = malloc(size);
37 1684494 : assert(matrix->name != NULL && name != NULL);
38 1684494 : memcpy(matrix->name, name, size);
39 :
40 1684494 : matrix->nrows = nrows;
41 1684494 : matrix->ncols = ncols;
42 :
43 1684494 : size = nrows * sizeof(int);
44 1684494 : matrix->row_sizes = malloc(size);
45 1684494 : assert(matrix->row_sizes != NULL || size == 0);
46 1684494 : if (size != 0) {
47 1683934 : memcpy(matrix->row_sizes, row_sizes, size);
48 : }
49 :
50 1684494 : size = ncols * sizeof(int);
51 1684494 : matrix->col_sizes = malloc(size);
52 1684494 : assert(matrix->col_sizes != NULL || size == 0);
53 1684494 : if (size != 0) {
54 1680330 : memcpy(matrix->col_sizes, col_sizes, size);
55 : }
56 :
57 1684494 : const int num_shards = dbm_get_num_shards(matrix);
58 1684494 : matrix->shards = malloc(num_shards * sizeof(dbm_shard_t));
59 1684494 : assert(matrix->shards != NULL || num_shards == 0);
60 1684494 : #pragma omp parallel for
61 : for (int ishard = 0; ishard < num_shards; ishard++) {
62 : dbm_shard_init(&matrix->shards[ishard]);
63 : }
64 :
65 1684494 : assert(*matrix_out == NULL);
66 1684494 : *matrix_out = matrix;
67 1684494 : }
68 :
69 : /*******************************************************************************
70 : * \brief Releases a matrix and all its ressources.
71 : * \author Ole Schuett
72 : ******************************************************************************/
73 1684494 : void dbm_release(dbm_matrix_t *matrix) {
74 1684494 : assert(omp_get_num_threads() == 1);
75 3368988 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
76 1684494 : dbm_shard_release(&matrix->shards[ishard]);
77 : }
78 1684494 : dbm_distribution_release(matrix->dist);
79 1684494 : free(matrix->row_sizes);
80 1684494 : free(matrix->col_sizes);
81 1684494 : free(matrix->shards);
82 1684494 : free(matrix->name);
83 1684494 : free(matrix);
84 1684494 : }
85 :
86 : /*******************************************************************************
87 : * \brief Copies content of matrix_b into matrix_a.
88 : * Matrices must have the same row/col block sizes and distribution.
89 : * \author Ole Schuett
90 : ******************************************************************************/
91 421322 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
92 421322 : assert(omp_get_num_threads() == 1);
93 :
94 421322 : assert(matrix_b->nrows == matrix_a->nrows);
95 5275008 : for (int i = 0; i < matrix_b->nrows; i++) {
96 4853686 : assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
97 : }
98 421322 : assert(matrix_b->ncols == matrix_a->ncols);
99 27217767 : for (int i = 0; i < matrix_b->ncols; i++) {
100 26796445 : assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
101 : }
102 :
103 421322 : assert(matrix_a->dist == matrix_b->dist);
104 :
105 421322 : #pragma omp parallel for DBM_OMP_SCHEDULE
106 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
107 : dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
108 : }
109 421322 : }
110 :
111 : /*******************************************************************************
112 : * \brief Copies content of matrix_b into matrix_a.
113 : * Matrices may have different distributions.
114 : * \author Ole Schuett
115 : ******************************************************************************/
116 144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
117 144 : assert(omp_get_num_threads() == 1);
118 144 : assert(matrix->nrows == redist->nrows);
119 6384 : for (int i = 0; i < matrix->nrows; i++) {
120 6240 : assert(matrix->row_sizes[i] == redist->row_sizes[i]);
121 : }
122 144 : assert(matrix->ncols == redist->ncols);
123 6384 : for (int i = 0; i < matrix->ncols; i++) {
124 6240 : assert(matrix->col_sizes[i] == redist->col_sizes[i]);
125 : }
126 :
127 144 : assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
128 144 : const dbm_mpi_comm_t comm = redist->dist->comm;
129 144 : const int nranks = dbm_mpi_comm_size(comm);
130 :
131 : // 1st pass: Compute send_count.
132 144 : int send_count[nranks];
133 144 : memset(send_count, 0, nranks * sizeof(int));
134 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
135 144 : dbm_shard_t *shard = &matrix->shards[ishard];
136 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
137 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
138 8792 : const int row_size = matrix->row_sizes[blk->row];
139 8792 : const int col_size = matrix->col_sizes[blk->col];
140 8792 : const int block_size = row_size * col_size;
141 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
142 8792 : assert(0 <= rank && rank < nranks);
143 8792 : send_count[rank] += 2 + block_size;
144 : }
145 : }
146 :
147 : // 1st communication: Exchange counts.
148 144 : int recv_count[nranks];
149 144 : dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
150 :
151 : // Compute displacements and allocate data buffers.
152 144 : int send_displ[nranks + 1], recv_displ[nranks + 1];
153 144 : send_displ[0] = recv_displ[0] = 0;
154 432 : for (int irank = 1; irank <= nranks; irank++) {
155 288 : send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
156 288 : recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
157 : }
158 144 : const int total_send_count = send_displ[nranks];
159 144 : const int total_recv_count = recv_displ[nranks];
160 144 : double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
161 144 : double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
162 :
163 : // 2nd pass: Fill send_data.
164 144 : int send_data_positions[nranks];
165 144 : memcpy(send_data_positions, send_displ, nranks * sizeof(int));
166 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
167 144 : dbm_shard_t *shard = &matrix->shards[ishard];
168 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
169 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
170 8792 : const double *blk_data = &shard->data[blk->offset];
171 8792 : const int row_size = matrix->row_sizes[blk->row];
172 8792 : const int col_size = matrix->col_sizes[blk->col];
173 8792 : const int block_size = row_size * col_size;
174 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
175 8792 : const int pos = send_data_positions[rank];
176 8792 : data_send[pos + 0] = blk->row; // send integers as doubles
177 8792 : data_send[pos + 1] = blk->col;
178 8792 : memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
179 8792 : send_data_positions[rank] += 2 + block_size;
180 : }
181 : }
182 432 : for (int irank = 0; irank < nranks; irank++) {
183 288 : assert(send_data_positions[irank] == send_displ[irank + 1]);
184 : }
185 :
186 : // 2nd communication: Exchange data.
187 144 : dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
188 : recv_count, recv_displ, comm);
189 144 : dbm_mpi_free_mem(data_send);
190 :
191 : // 3rd pass: Unpack data.
192 144 : dbm_clear(redist);
193 144 : int recv_data_pos = 0;
194 8936 : while (recv_data_pos < total_recv_count) {
195 8792 : const int row = (int)data_recv[recv_data_pos + 0];
196 8792 : const int col = (int)data_recv[recv_data_pos + 1];
197 8792 : assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
198 8792 : assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
199 8792 : dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
200 8792 : const int row_size = matrix->row_sizes[row];
201 8792 : const int col_size = matrix->col_sizes[col];
202 8792 : const int block_size = row_size * col_size;
203 8792 : recv_data_pos += 2 + block_size;
204 : }
205 144 : assert(recv_data_pos == total_recv_count);
206 144 : dbm_mpi_free_mem(data_recv);
207 144 : }
208 :
209 : /*******************************************************************************
210 : * \brief Looks up a block from given matrics. This routine is thread-safe.
211 : * If the block is not found then a null pointer is returned.
212 : * \author Ole Schuett
213 : ******************************************************************************/
214 21370209 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
215 : double **block, int *row_size, int *col_size) {
216 21370209 : assert(0 <= row && row < matrix->nrows);
217 21370209 : assert(0 <= col && col < matrix->ncols);
218 21370209 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
219 21370209 : *row_size = matrix->row_sizes[row];
220 21370209 : *col_size = matrix->col_sizes[col];
221 21370209 : *block = NULL;
222 :
223 21370209 : const int ishard = dbm_get_shard_index(matrix, row, col);
224 21370209 : const dbm_shard_t *shard = &matrix->shards[ishard];
225 21370209 : dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
226 21370209 : if (blk != NULL) {
227 19803145 : *block = &shard->data[blk->offset];
228 : }
229 21370209 : }
230 :
231 : /*******************************************************************************
232 : * \brief Adds a block to given matrix. This routine is thread-safe.
233 : * If block already exist then it gets overwritten (or summed).
234 : * \author Ole Schuett
235 : ******************************************************************************/
236 34182882 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
237 : const bool summation, const double *block) {
238 34182882 : assert(0 <= row && row < matrix->nrows);
239 34182882 : assert(0 <= col && col < matrix->ncols);
240 34182882 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
241 34182882 : const int row_size = matrix->row_sizes[row];
242 34182882 : const int col_size = matrix->col_sizes[col];
243 34182882 : const int block_size = row_size * col_size;
244 :
245 34182882 : const int ishard = dbm_get_shard_index(matrix, row, col);
246 34182882 : dbm_shard_t *shard = &matrix->shards[ishard];
247 34182882 : omp_set_lock(&shard->lock);
248 34182882 : dbm_block_t *blk =
249 34182882 : dbm_shard_get_or_allocate_block(shard, row, col, block_size);
250 34182882 : double *blk_data = &shard->data[blk->offset];
251 34182882 : if (summation) {
252 3657777 : assert(blk_data != NULL || 0 == block_size);
253 1292510007 : for (int i = 0; i < block_size; i++) {
254 1288852230 : blk_data[i] += block[i];
255 : }
256 30525105 : } else if (block_size != 0) {
257 30520006 : memcpy(blk_data, block, block_size * sizeof(double));
258 : }
259 34182882 : omp_unset_lock(&shard->lock);
260 34182882 : }
261 :
262 : /*******************************************************************************
263 : * \brief Remove all blocks from matrix, but does not release underlying memory.
264 : * \author Ole Schuett
265 : ******************************************************************************/
266 2022940 : void dbm_clear(dbm_matrix_t *matrix) {
267 2022940 : assert(omp_get_num_threads() == 1);
268 :
269 2022940 : #pragma omp parallel for DBM_OMP_SCHEDULE
270 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
271 : dbm_shard_t *shard = &matrix->shards[ishard];
272 : shard->nblocks = 0;
273 : shard->data_size = 0;
274 : shard->data_promised = 0;
275 : // Does not deallocate memory, hence data_allocated remains unchanged.
276 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
277 : }
278 2022940 : }
279 :
280 : /*******************************************************************************
281 : * \brief Removes all blocks from the matrix whose norm is below the threshold.
282 : * Blocks of size zero are always kept.
283 : * \author Ole Schuett
284 : ******************************************************************************/
285 602729 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
286 602729 : assert(omp_get_num_threads() == 1);
287 :
288 602729 : if (eps == 0.0) {
289 : return;
290 : }
291 561951 : const double eps2 = eps * eps;
292 :
293 561951 : #pragma omp parallel for DBM_OMP_SCHEDULE
294 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
295 : dbm_shard_t *shard = &matrix->shards[ishard];
296 : const int old_nblocks = shard->nblocks;
297 : shard->nblocks = 0;
298 : shard->data_promised = 0;
299 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
300 :
301 : for (int iblock = 0; iblock < old_nblocks; iblock++) {
302 : const dbm_block_t old_blk = shard->blocks[iblock];
303 : const double *old_blk_data = &shard->data[old_blk.offset];
304 : const int row_size = matrix->row_sizes[old_blk.row];
305 : const int col_size = matrix->col_sizes[old_blk.col];
306 : const int block_size = row_size * col_size;
307 : double norm = 0.0;
308 : for (int i = 0; i < block_size; i++) {
309 : const double value = old_blk_data[i];
310 : norm += value * value;
311 : if (eps2 <= norm) {
312 : break;
313 : }
314 : }
315 : // For historic reasons zero-sized blocks are never filtered.
316 : if (block_size > 0 && norm < eps2) {
317 : continue; // filter the block
318 : }
319 : // Re-create block.
320 : dbm_block_t *new_blk = dbm_shard_promise_new_block(
321 : shard, old_blk.row, old_blk.col, block_size);
322 : assert(new_blk->offset <= old_blk.offset);
323 : if (new_blk->offset != old_blk.offset) {
324 : // Using memmove because it can handle overlapping buffers.
325 : double *new_blk_data = &shard->data[new_blk->offset];
326 : memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
327 : }
328 : }
329 : shard->data_size = shard->data_promised;
330 : // TODO: Could call realloc to release excess memory.
331 : }
332 : }
333 :
334 : /*******************************************************************************
335 : * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
336 : * This routine must always be called within an OpenMP parallel region.
337 : * \author Ole Schuett
338 : ******************************************************************************/
339 1428081 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
340 : const int rows[], const int cols[]) {
341 1428081 : assert(omp_get_num_threads() == omp_get_max_threads() &&
342 : "Please call dbm_reserve_blocks within an OpenMP parallel region.");
343 1428081 : const int my_rank = matrix->dist->my_rank;
344 30489283 : for (int i = 0; i < nblocks; i++) {
345 29061202 : const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
346 29061202 : dbm_shard_t *shard = &matrix->shards[ishard];
347 29061202 : omp_set_lock(&shard->lock);
348 29061202 : assert(0 <= rows[i] && rows[i] < matrix->nrows);
349 29061202 : assert(0 <= cols[i] && cols[i] < matrix->ncols);
350 29061202 : assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
351 29061202 : const int row_size = matrix->row_sizes[rows[i]];
352 29061202 : const int col_size = matrix->col_sizes[cols[i]];
353 29061202 : const int block_size = row_size * col_size;
354 29061202 : dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
355 29061202 : omp_unset_lock(&shard->lock);
356 : }
357 1428081 : #pragma omp barrier
358 :
359 : #pragma omp for DBM_OMP_SCHEDULE
360 1428081 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
361 1428081 : dbm_shard_t *shard = &matrix->shards[ishard];
362 1428081 : dbm_shard_allocate_promised_blocks(shard);
363 : }
364 1428081 : }
365 :
366 : /*******************************************************************************
367 : * \brief Multiplies all entries in the given matrix by the given factor alpha.
368 : * \author Ole Schuett
369 : ******************************************************************************/
370 453308 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
371 453308 : assert(omp_get_num_threads() == 1);
372 453308 : if (alpha == 1.0) {
373 : return;
374 : }
375 342903 : if (alpha == 0.0) {
376 332211 : dbm_zero(matrix);
377 332211 : return;
378 : }
379 :
380 10692 : #pragma omp parallel for DBM_OMP_SCHEDULE
381 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
382 : dbm_shard_t *shard = &matrix->shards[ishard];
383 : for (int i = 0; i < shard->data_size; i++) {
384 : shard->data[i] *= alpha;
385 : }
386 : }
387 : }
388 :
389 : /*******************************************************************************
390 : * \brief Sets all blocks in the given matrix to zero.
391 : * \author Ole Schuett
392 : ******************************************************************************/
393 332211 : void dbm_zero(dbm_matrix_t *matrix) {
394 332211 : assert(omp_get_num_threads() == 1);
395 :
396 332211 : #pragma omp parallel for DBM_OMP_SCHEDULE
397 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
398 : dbm_shard_t *shard = &matrix->shards[ishard];
399 : if (shard->data != NULL) {
400 : memset(shard->data, 0, shard->data_size * sizeof(double));
401 : }
402 : }
403 332211 : }
404 :
405 : /*******************************************************************************
406 : * \brief Adds matrix_b to matrix_a.
407 : * \author Ole Schuett
408 : ******************************************************************************/
409 208290 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
410 208290 : assert(omp_get_num_threads() == 1);
411 208290 : assert(matrix_a->dist == matrix_b->dist);
412 :
413 208290 : #pragma omp parallel for DBM_OMP_SCHEDULE
414 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
415 : dbm_shard_t *shard_a = &matrix_a->shards[ishard];
416 : const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
417 : for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
418 : const dbm_block_t blk_b = shard_b->blocks[iblock];
419 :
420 : const int row_size = matrix_b->row_sizes[blk_b.row];
421 : const int col_size = matrix_b->col_sizes[blk_b.col];
422 : assert(row_size == matrix_a->row_sizes[blk_b.row]);
423 : assert(col_size == matrix_a->col_sizes[blk_b.col]);
424 : const int block_size = row_size * col_size;
425 : dbm_block_t *blk_a = dbm_shard_get_or_allocate_block(
426 : shard_a, blk_b.row, blk_b.col, block_size);
427 : double *data_a = &shard_a->data[blk_a->offset];
428 : const double *data_b = &shard_b->data[blk_b.offset];
429 : for (int i = 0; i < block_size; i++) {
430 : data_a[i] += data_b[i];
431 : }
432 : }
433 : }
434 208290 : }
435 :
436 : /*******************************************************************************
437 : * \brief Creates an iterator for the blocks of the given matrix.
438 : * The iteration order is not stable.
439 : * This routine must always be called within an OpenMP parallel region.
440 : * \author Ole Schuett
441 : ******************************************************************************/
442 3347846 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
443 3347846 : assert(omp_get_num_threads() == omp_get_max_threads() &&
444 : "Please call dbm_iterator_start within an OpenMP parallel region.");
445 3347846 : dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
446 3347846 : assert(iter != NULL);
447 3347846 : iter->matrix = matrix;
448 3347846 : iter->next_block = 0;
449 3347846 : iter->next_shard = omp_get_thread_num();
450 3885561 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
451 3347846 : matrix->shards[iter->next_shard].nblocks == 0) {
452 537715 : iter->next_shard += omp_get_num_threads();
453 : }
454 3347846 : assert(*iter_out == NULL);
455 3347846 : *iter_out = iter;
456 3347846 : }
457 :
458 : /*******************************************************************************
459 : * \brief Returns number of blocks the iterator will provide to calling thread.
460 : * \author Ole Schuett
461 : ******************************************************************************/
462 632927 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
463 632927 : int num_blocks = 0;
464 632927 : for (int ishard = omp_get_thread_num();
465 1265854 : ishard < dbm_get_num_shards(iter->matrix);
466 632927 : ishard += omp_get_num_threads()) {
467 632927 : num_blocks += iter->matrix->shards[ishard].nblocks;
468 : }
469 632927 : return num_blocks;
470 : }
471 :
472 : /*******************************************************************************
473 : * \brief Tests whether the given iterator has any block left.
474 : * \author Ole Schuett
475 : ******************************************************************************/
476 59853509 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
477 59853509 : return iter->next_shard < dbm_get_num_shards(iter->matrix);
478 : }
479 :
480 : /*******************************************************************************
481 : * \brief Returns the next block from the given iterator.
482 : * \author Ole Schuett
483 : ******************************************************************************/
484 66515068 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
485 : double **block, int *row_size, int *col_size) {
486 66515068 : const dbm_matrix_t *matrix = iter->matrix;
487 66515068 : assert(iter->next_shard < dbm_get_num_shards(matrix));
488 66515068 : const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
489 66515068 : assert(iter->next_block < shard->nblocks);
490 66515068 : dbm_block_t *blk = &shard->blocks[iter->next_block];
491 :
492 66515068 : *row = blk->row;
493 66515068 : *col = blk->col;
494 66515068 : *row_size = matrix->row_sizes[blk->row];
495 66515068 : *col_size = matrix->col_sizes[blk->col];
496 66515068 : *block = &shard->data[blk->offset];
497 :
498 66515068 : iter->next_block++;
499 66515068 : if (iter->next_block >= shard->nblocks) {
500 : // Advance to the next non-empty shard...
501 2810131 : iter->next_shard += omp_get_num_threads();
502 2810131 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
503 0 : matrix->shards[iter->next_shard].nblocks == 0) {
504 0 : iter->next_shard += omp_get_num_threads();
505 : }
506 2810131 : iter->next_block = 0; // ...and continue with its first block.
507 : }
508 66515068 : }
509 :
510 : /*******************************************************************************
511 : * \brief Releases the given iterator.
512 : * \author Ole Schuett
513 : ******************************************************************************/
514 3347846 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
515 :
516 : /*******************************************************************************
517 : * \brief Private routine to accumulate using Kahan's summation.
518 : * \author Hans Pabst
519 : ******************************************************************************/
520 14383468 : static double kahan_sum(double value, double *accumulator,
521 : double *compensation) {
522 14383468 : double r, c;
523 14383468 : assert(NULL != accumulator && NULL != compensation);
524 14383468 : c = value - *compensation;
525 14383468 : r = *accumulator + c;
526 14383468 : *compensation = (r - *accumulator) - c;
527 14383468 : *accumulator = r;
528 14383468 : return r;
529 : }
530 :
531 : /*******************************************************************************
532 : * \brief Computes a checksum of the given matrix.
533 : * \author Ole Schuett
534 : ******************************************************************************/
535 190 : double dbm_checksum(const dbm_matrix_t *matrix) {
536 190 : double checksum = 0.0, compensation = 0.0;
537 380 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
538 190 : const dbm_shard_t *shard = &matrix->shards[ishard];
539 14383658 : for (int i = 0; i < shard->data_size; i++) {
540 14383468 : const double value = shard->data[i];
541 14383468 : kahan_sum(value * value, &checksum, &compensation);
542 : }
543 : }
544 190 : dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
545 190 : return checksum;
546 : }
547 :
548 : /*******************************************************************************
549 : * \brief Returns the largest absolute value of all matrix elements.
550 : * \author Ole Schuett
551 : ******************************************************************************/
552 48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
553 48 : double maxabs = 0.0;
554 96 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
555 48 : const dbm_shard_t *shard = &matrix->shards[ishard];
556 1599050 : for (int i = 0; i < shard->data_size; i++) {
557 1599002 : maxabs = fmax(maxabs, fabs(shard->data[i]));
558 : }
559 : }
560 48 : dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
561 48 : return maxabs;
562 : }
563 :
564 : /*******************************************************************************
565 : * \brief Returns the name of the matrix of the given matrix.
566 : * \author Ole Schuett
567 : ******************************************************************************/
568 1748211 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
569 :
570 : /*******************************************************************************
571 : * \brief Returns the number of local Non-Zero Elements of the given matrix.
572 : * \author Ole Schuett
573 : ******************************************************************************/
574 1856510 : int dbm_get_nze(const dbm_matrix_t *matrix) {
575 1856510 : int nze = 0;
576 3713020 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
577 1856510 : nze += matrix->shards[ishard].data_size;
578 : }
579 1856510 : return nze;
580 : }
581 :
582 : /*******************************************************************************
583 : * \brief Returns the number of local blocks of the given matrix.
584 : * \author Ole Schuett
585 : ******************************************************************************/
586 1020888 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
587 1020888 : int nblocks = 0;
588 2041776 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
589 1020888 : nblocks += matrix->shards[ishard].nblocks;
590 : }
591 1020888 : return nblocks;
592 : }
593 :
594 : /*******************************************************************************
595 : * \brief Returns the row block sizes of the given matrix.
596 : * \author Ole Schuett
597 : ******************************************************************************/
598 4762462 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
599 : const int **row_sizes) {
600 4762462 : *nrows = matrix->nrows;
601 4762462 : *row_sizes = matrix->row_sizes;
602 4762462 : }
603 :
604 : /*******************************************************************************
605 : * \brief Returns the column block sizes of the given matrix.
606 : * \author Ole Schuett
607 : ******************************************************************************/
608 3791884 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
609 : const int **col_sizes) {
610 3791884 : *ncols = matrix->ncols;
611 3791884 : *col_sizes = matrix->col_sizes;
612 3791884 : }
613 :
614 : /*******************************************************************************
615 : * \brief Returns the local row block sizes of the given matrix.
616 : * \author Ole Schuett
617 : ******************************************************************************/
618 264756 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
619 : const int **local_rows) {
620 264756 : *nlocal_rows = matrix->dist->rows.nlocals;
621 264756 : *local_rows = matrix->dist->rows.local_indicies;
622 264756 : }
623 :
624 : /*******************************************************************************
625 : * \brief Returns the local column block sizes of the given matrix.
626 : * \author Ole Schuett
627 : ******************************************************************************/
628 122256 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
629 : const int **local_cols) {
630 122256 : *nlocal_cols = matrix->dist->cols.nlocals;
631 122256 : *local_cols = matrix->dist->cols.local_indicies;
632 122256 : }
633 :
634 : /*******************************************************************************
635 : * \brief Returns the MPI rank on which the given block should be stored.
636 : * \author Ole Schuett
637 : ******************************************************************************/
638 92379655 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
639 : const int col) {
640 92379655 : return dbm_distribution_stored_coords(matrix->dist, row, col);
641 : }
642 :
643 : /*******************************************************************************
644 : * \brief Returns the distribution of the given matrix.
645 : * \author Ole Schuett
646 : ******************************************************************************/
647 1251167 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
648 1251167 : return matrix->dist;
649 : }
650 :
651 : // EOF
|