Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2025 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_matrix.h"
8 : #include "dbm_hyperparams.h"
9 :
10 : #include <assert.h>
11 : #include <math.h>
12 : #include <omp.h>
13 : #include <stdbool.h>
14 : #include <stddef.h>
15 : #include <stdlib.h>
16 : #include <string.h>
17 :
18 : /*******************************************************************************
19 : * \brief Creates a new matrix.
20 : * \author Ole Schuett
21 : ******************************************************************************/
22 1717896 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
23 : const char name[], const int nrows, const int ncols,
24 : const int row_sizes[nrows], const int col_sizes[ncols]) {
25 1717896 : assert(omp_get_num_threads() == 1);
26 :
27 1717896 : dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
28 :
29 1717896 : assert(dist->rows.length == nrows);
30 1717896 : assert(dist->cols.length == ncols);
31 1717896 : dbm_distribution_hold(dist);
32 1717896 : matrix->dist = dist;
33 :
34 1717896 : size_t size = (strlen(name) + 1) * sizeof(char);
35 1717896 : matrix->name = malloc(size);
36 1717896 : assert(matrix->name != NULL && name != NULL);
37 1717896 : memcpy(matrix->name, name, size);
38 :
39 1717896 : matrix->nrows = nrows;
40 1717896 : matrix->ncols = ncols;
41 :
42 1717896 : size = nrows * sizeof(int);
43 1717896 : matrix->row_sizes = malloc(size);
44 1717896 : assert(matrix->row_sizes != NULL || size == 0);
45 1717896 : if (size != 0) {
46 1717336 : memcpy(matrix->row_sizes, row_sizes, size);
47 : }
48 :
49 1717896 : size = ncols * sizeof(int);
50 1717896 : matrix->col_sizes = malloc(size);
51 1717896 : assert(matrix->col_sizes != NULL || size == 0);
52 1717896 : if (size != 0) {
53 1713732 : memcpy(matrix->col_sizes, col_sizes, size);
54 : }
55 :
56 1717896 : const int num_shards = dbm_get_num_shards(matrix);
57 1717896 : matrix->shards = malloc(num_shards * sizeof(dbm_shard_t));
58 1717896 : assert(matrix->shards != NULL || num_shards == 0);
59 1717896 : #pragma omp parallel for
60 : for (int ishard = 0; ishard < num_shards; ishard++) {
61 : dbm_shard_init(&matrix->shards[ishard]);
62 : }
63 :
64 1717896 : assert(*matrix_out == NULL);
65 1717896 : *matrix_out = matrix;
66 1717896 : }
67 :
68 : /*******************************************************************************
69 : * \brief Releases a matrix and all its ressources.
70 : * \author Ole Schuett
71 : ******************************************************************************/
72 1717896 : void dbm_release(dbm_matrix_t *matrix) {
73 1717896 : assert(omp_get_num_threads() == 1);
74 3435792 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
75 1717896 : dbm_shard_release(&matrix->shards[ishard]);
76 : }
77 1717896 : dbm_distribution_release(matrix->dist);
78 1717896 : free(matrix->row_sizes);
79 1717896 : free(matrix->col_sizes);
80 1717896 : free(matrix->shards);
81 1717896 : free(matrix->name);
82 1717896 : free(matrix);
83 1717896 : }
84 :
85 : /*******************************************************************************
86 : * \brief Copies content of matrix_b into matrix_a.
87 : * Matrices must have the same row/col block sizes and distribution.
88 : * \author Ole Schuett
89 : ******************************************************************************/
90 431186 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
91 431186 : assert(omp_get_num_threads() == 1);
92 :
93 431186 : assert(matrix_b->nrows == matrix_a->nrows);
94 5309136 : for (int i = 0; i < matrix_b->nrows; i++) {
95 4877950 : assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
96 : }
97 431186 : assert(matrix_b->ncols == matrix_a->ncols);
98 27251799 : for (int i = 0; i < matrix_b->ncols; i++) {
99 26820613 : assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
100 : }
101 :
102 431186 : assert(matrix_a->dist == matrix_b->dist);
103 :
104 431186 : #pragma omp parallel for DBM_OMP_SCHEDULE
105 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
106 : dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
107 : }
108 431186 : }
109 :
110 : /*******************************************************************************
111 : * \brief Copies content of matrix_b into matrix_a.
112 : * Matrices may have different distributions.
113 : * \author Ole Schuett
114 : ******************************************************************************/
115 144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
116 144 : assert(omp_get_num_threads() == 1);
117 144 : assert(matrix->nrows == redist->nrows);
118 6384 : for (int i = 0; i < matrix->nrows; i++) {
119 6240 : assert(matrix->row_sizes[i] == redist->row_sizes[i]);
120 : }
121 144 : assert(matrix->ncols == redist->ncols);
122 6384 : for (int i = 0; i < matrix->ncols; i++) {
123 6240 : assert(matrix->col_sizes[i] == redist->col_sizes[i]);
124 : }
125 :
126 144 : assert(cp_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
127 144 : const cp_mpi_comm_t comm = redist->dist->comm;
128 144 : const int nranks = cp_mpi_comm_size(comm);
129 :
130 : // 1st pass: Compute send_count.
131 144 : int send_count[nranks];
132 144 : memset(send_count, 0, nranks * sizeof(int));
133 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
134 144 : dbm_shard_t *shard = &matrix->shards[ishard];
135 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
136 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
137 8792 : const int row_size = matrix->row_sizes[blk->row];
138 8792 : const int col_size = matrix->col_sizes[blk->col];
139 8792 : const int block_size = row_size * col_size;
140 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
141 8792 : assert(0 <= rank && rank < nranks);
142 8792 : send_count[rank] += 2 + block_size;
143 : }
144 : }
145 :
146 : // 1st communication: Exchange counts.
147 144 : int recv_count[nranks];
148 144 : cp_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
149 :
150 : // Compute displacements and allocate data buffers.
151 144 : int send_displ[nranks + 1], recv_displ[nranks + 1];
152 144 : send_displ[0] = recv_displ[0] = 0;
153 432 : for (int irank = 1; irank <= nranks; irank++) {
154 288 : send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
155 288 : recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
156 : }
157 144 : const int total_send_count = send_displ[nranks];
158 144 : const int total_recv_count = recv_displ[nranks];
159 144 : double *data_send = cp_mpi_alloc_mem(total_send_count * sizeof(double));
160 144 : double *data_recv = cp_mpi_alloc_mem(total_recv_count * sizeof(double));
161 :
162 : // 2nd pass: Fill send_data.
163 144 : int send_data_positions[nranks];
164 144 : memcpy(send_data_positions, send_displ, nranks * sizeof(int));
165 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
166 144 : dbm_shard_t *shard = &matrix->shards[ishard];
167 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
168 8792 : const dbm_block_t *blk = &shard->blocks[iblock];
169 8792 : const double *blk_data = &shard->data[blk->offset];
170 8792 : const int row_size = matrix->row_sizes[blk->row];
171 8792 : const int col_size = matrix->col_sizes[blk->col];
172 8792 : const int block_size = row_size * col_size;
173 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
174 8792 : const int pos = send_data_positions[rank];
175 8792 : data_send[pos + 0] = blk->row; // send integers as doubles
176 8792 : data_send[pos + 1] = blk->col;
177 8792 : memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
178 8792 : send_data_positions[rank] += 2 + block_size;
179 : }
180 : }
181 432 : for (int irank = 0; irank < nranks; irank++) {
182 288 : assert(send_data_positions[irank] == send_displ[irank + 1]);
183 : }
184 :
185 : // 2nd communication: Exchange data.
186 144 : cp_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
187 : recv_count, recv_displ, comm);
188 144 : cp_mpi_free_mem(data_send);
189 :
190 : // 3rd pass: Unpack data.
191 144 : dbm_clear(redist);
192 144 : int recv_data_pos = 0;
193 8936 : while (recv_data_pos < total_recv_count) {
194 8792 : const int row = (int)data_recv[recv_data_pos + 0];
195 8792 : const int col = (int)data_recv[recv_data_pos + 1];
196 8792 : assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
197 8792 : assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
198 8792 : dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
199 8792 : const int row_size = matrix->row_sizes[row];
200 8792 : const int col_size = matrix->col_sizes[col];
201 8792 : const int block_size = row_size * col_size;
202 8792 : recv_data_pos += 2 + block_size;
203 : }
204 144 : assert(recv_data_pos == total_recv_count);
205 144 : cp_mpi_free_mem(data_recv);
206 144 : }
207 :
208 : /*******************************************************************************
209 : * \brief Looks up a block from given matrics. This routine is thread-safe.
210 : * If the block is not found then a null pointer is returned.
211 : * \author Ole Schuett
212 : ******************************************************************************/
213 21397733 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
214 : double **block, int *row_size, int *col_size) {
215 21397733 : assert(0 <= row && row < matrix->nrows);
216 21397733 : assert(0 <= col && col < matrix->ncols);
217 21397733 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
218 21397733 : *row_size = matrix->row_sizes[row];
219 21397733 : *col_size = matrix->col_sizes[col];
220 21397733 : *block = NULL;
221 :
222 21397733 : const int ishard = dbm_get_shard_index(matrix, row, col);
223 21397733 : const dbm_shard_t *shard = &matrix->shards[ishard];
224 21397733 : dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
225 21397733 : if (blk != NULL) {
226 19830669 : *block = &shard->data[blk->offset];
227 : }
228 21397733 : }
229 :
230 : /*******************************************************************************
231 : * \brief Adds a block to given matrix. This routine is thread-safe.
232 : * If block already exist then it gets overwritten (or summed).
233 : * \author Ole Schuett
234 : ******************************************************************************/
235 34062668 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
236 : const bool summation, const double *block) {
237 34062668 : assert(0 <= row && row < matrix->nrows);
238 34062668 : assert(0 <= col && col < matrix->ncols);
239 34062668 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
240 34062668 : const int row_size = matrix->row_sizes[row];
241 34062668 : const int col_size = matrix->col_sizes[col];
242 34062668 : const int block_size = row_size * col_size;
243 :
244 34062668 : const int ishard = dbm_get_shard_index(matrix, row, col);
245 34062668 : dbm_shard_t *shard = &matrix->shards[ishard];
246 34062668 : omp_set_lock(&shard->lock);
247 34062668 : dbm_block_t *blk =
248 34062668 : dbm_shard_get_or_allocate_block(shard, row, col, block_size);
249 34062668 : double *blk_data = &shard->data[blk->offset];
250 34062668 : if (summation) {
251 3678301 : assert(blk_data != NULL || 0 == block_size);
252 1292875636 : for (int i = 0; i < block_size; i++) {
253 1289197335 : blk_data[i] += block[i];
254 : }
255 30384367 : } else if (block_size != 0) {
256 30379268 : memcpy(blk_data, block, block_size * sizeof(double));
257 : }
258 34062668 : omp_unset_lock(&shard->lock);
259 34062668 : }
260 :
261 : /*******************************************************************************
262 : * \brief Remove all blocks from matrix, but does not release underlying memory.
263 : * \author Ole Schuett
264 : ******************************************************************************/
265 2048396 : void dbm_clear(dbm_matrix_t *matrix) {
266 2048396 : assert(omp_get_num_threads() == 1);
267 :
268 2048396 : #pragma omp parallel for DBM_OMP_SCHEDULE
269 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
270 : dbm_shard_t *shard = &matrix->shards[ishard];
271 : shard->nblocks = 0;
272 : shard->data_size = 0;
273 : shard->data_promised = 0;
274 : // Does not deallocate memory, hence data_allocated remains unchanged.
275 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
276 : }
277 2048396 : }
278 :
279 : /*******************************************************************************
280 : * \brief Removes all blocks from the matrix whose norm is below the threshold.
281 : * Blocks of size zero are always kept.
282 : * \author Ole Schuett
283 : ******************************************************************************/
284 607925 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
285 607925 : assert(omp_get_num_threads() == 1);
286 :
287 607925 : if (eps == 0.0) {
288 : return;
289 : }
290 562431 : const double eps2 = eps * eps;
291 :
292 562431 : #pragma omp parallel for DBM_OMP_SCHEDULE
293 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
294 : dbm_shard_t *shard = &matrix->shards[ishard];
295 : const int old_nblocks = shard->nblocks;
296 : shard->nblocks = 0;
297 : shard->data_promised = 0;
298 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
299 :
300 : for (int iblock = 0; iblock < old_nblocks; iblock++) {
301 : const dbm_block_t old_blk = shard->blocks[iblock];
302 : const double *old_blk_data = &shard->data[old_blk.offset];
303 : const int row_size = matrix->row_sizes[old_blk.row];
304 : const int col_size = matrix->col_sizes[old_blk.col];
305 : const int block_size = row_size * col_size;
306 : double norm = 0.0;
307 : for (int i = 0; i < block_size; i++) {
308 : const double value = old_blk_data[i];
309 : norm += value * value;
310 : if (eps2 <= norm) {
311 : break;
312 : }
313 : }
314 : // For historic reasons zero-sized blocks are never filtered.
315 : if (block_size > 0 && norm < eps2) {
316 : continue; // filter the block
317 : }
318 : // Re-create block.
319 : dbm_block_t *new_blk = dbm_shard_promise_new_block(
320 : shard, old_blk.row, old_blk.col, block_size);
321 : assert(new_blk->offset <= old_blk.offset);
322 : if (new_blk->offset != old_blk.offset) {
323 : // Using memmove because it can handle overlapping buffers.
324 : double *new_blk_data = &shard->data[new_blk->offset];
325 : memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
326 : }
327 : }
328 : shard->data_size = shard->data_promised;
329 : // TODO: Could call realloc to release excess memory.
330 : }
331 : }
332 :
333 : /*******************************************************************************
334 : * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
335 : * This routine must always be called within an OpenMP parallel region.
336 : * \author Ole Schuett
337 : ******************************************************************************/
338 1445667 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
339 : const int rows[], const int cols[]) {
340 1445667 : assert(omp_get_num_threads() == omp_get_max_threads() &&
341 : "Please call dbm_reserve_blocks within an OpenMP parallel region.");
342 1445667 : const int my_rank = matrix->dist->my_rank;
343 30386751 : for (int i = 0; i < nblocks; i++) {
344 28941084 : const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
345 28941084 : dbm_shard_t *shard = &matrix->shards[ishard];
346 28941084 : omp_set_lock(&shard->lock);
347 28941084 : assert(0 <= rows[i] && rows[i] < matrix->nrows);
348 28941084 : assert(0 <= cols[i] && cols[i] < matrix->ncols);
349 28941084 : assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
350 28941084 : const int row_size = matrix->row_sizes[rows[i]];
351 28941084 : const int col_size = matrix->col_sizes[cols[i]];
352 28941084 : const int block_size = row_size * col_size;
353 28941084 : dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
354 28941084 : omp_unset_lock(&shard->lock);
355 : }
356 1445667 : #pragma omp barrier
357 :
358 : #pragma omp for DBM_OMP_SCHEDULE
359 1445667 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
360 1445667 : dbm_shard_t *shard = &matrix->shards[ishard];
361 1445667 : dbm_shard_allocate_promised_blocks(shard);
362 : }
363 1445667 : }
364 :
365 : /*******************************************************************************
366 : * \brief Multiplies all entries in the given matrix by the given factor alpha.
367 : * \author Ole Schuett
368 : ******************************************************************************/
369 466328 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
370 466328 : assert(omp_get_num_threads() == 1);
371 466328 : if (alpha == 1.0) {
372 : return;
373 : }
374 355707 : if (alpha == 0.0) {
375 345003 : dbm_zero(matrix);
376 345003 : return;
377 : }
378 :
379 10704 : #pragma omp parallel for DBM_OMP_SCHEDULE
380 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
381 : dbm_shard_t *shard = &matrix->shards[ishard];
382 : for (int i = 0; i < shard->data_size; i++) {
383 : shard->data[i] *= alpha;
384 : }
385 : }
386 : }
387 :
388 : /*******************************************************************************
389 : * \brief Sets all blocks in the given matrix to zero.
390 : * \author Ole Schuett
391 : ******************************************************************************/
392 345003 : void dbm_zero(dbm_matrix_t *matrix) {
393 345003 : assert(omp_get_num_threads() == 1);
394 :
395 345003 : #pragma omp parallel for DBM_OMP_SCHEDULE
396 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
397 : dbm_shard_t *shard = &matrix->shards[ishard];
398 : if (shard->data != NULL) {
399 : memset(shard->data, 0, shard->data_size * sizeof(double));
400 : }
401 : }
402 345003 : }
403 :
404 : /*******************************************************************************
405 : * \brief Adds matrix_b to matrix_a.
406 : * \author Ole Schuett
407 : ******************************************************************************/
408 213222 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
409 213222 : assert(omp_get_num_threads() == 1);
410 213222 : assert(matrix_a->dist == matrix_b->dist);
411 :
412 213222 : #pragma omp parallel for DBM_OMP_SCHEDULE
413 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
414 : dbm_shard_t *shard_a = &matrix_a->shards[ishard];
415 : const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
416 : for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
417 : const dbm_block_t blk_b = shard_b->blocks[iblock];
418 : const int row_size = matrix_b->row_sizes[blk_b.row];
419 : const int col_size = matrix_b->col_sizes[blk_b.col];
420 : assert(row_size == matrix_a->row_sizes[blk_b.row]);
421 : assert(col_size == matrix_a->col_sizes[blk_b.col]);
422 : const int block_size = row_size * col_size;
423 : dbm_block_t *blk_a = dbm_shard_get_or_allocate_block(
424 : shard_a, blk_b.row, blk_b.col, block_size);
425 : double *data_a = &shard_a->data[blk_a->offset];
426 : const double *data_b = &shard_b->data[blk_b.offset];
427 : for (int i = 0; i < block_size; i++) {
428 : data_a[i] += data_b[i];
429 : }
430 : }
431 : }
432 213222 : }
433 :
434 : /*******************************************************************************
435 : * \brief Creates an iterator for the blocks of the given matrix.
436 : * The iteration order is not stable.
437 : * This routine must always be called within an OpenMP parallel region.
438 : * \author Ole Schuett
439 : ******************************************************************************/
440 3385070 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
441 3385070 : assert(omp_get_num_threads() == omp_get_max_threads() &&
442 : "Please call dbm_iterator_start within an OpenMP parallel region.");
443 3385070 : dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
444 3385070 : assert(iter != NULL);
445 3385070 : iter->matrix = matrix;
446 3385070 : iter->next_block = 0;
447 3385070 : iter->next_shard = omp_get_thread_num();
448 3923582 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
449 3385070 : matrix->shards[iter->next_shard].nblocks == 0) {
450 538512 : iter->next_shard += omp_get_num_threads();
451 : }
452 3385070 : assert(*iter_out == NULL);
453 3385070 : *iter_out = iter;
454 3385070 : }
455 :
456 : /*******************************************************************************
457 : * \brief Returns number of blocks the iterator will provide to calling thread.
458 : * \author Ole Schuett
459 : ******************************************************************************/
460 639233 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
461 639233 : int num_blocks = 0;
462 639233 : for (int ishard = omp_get_thread_num();
463 1278466 : ishard < dbm_get_num_shards(iter->matrix);
464 639233 : ishard += omp_get_num_threads()) {
465 639233 : num_blocks += iter->matrix->shards[ishard].nblocks;
466 : }
467 639233 : return num_blocks;
468 : }
469 :
470 : /*******************************************************************************
471 : * \brief Tests whether the given iterator has any block left.
472 : * \author Ole Schuett
473 : ******************************************************************************/
474 59763668 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
475 59763668 : return iter->next_shard < dbm_get_num_shards(iter->matrix);
476 : }
477 :
478 : /*******************************************************************************
479 : * \brief Returns the next block from the given iterator.
480 : * \author Ole Schuett
481 : ******************************************************************************/
482 66410153 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
483 : double **block, int *row_size, int *col_size) {
484 66410153 : const dbm_matrix_t *matrix = iter->matrix;
485 66410153 : assert(iter->next_shard < dbm_get_num_shards(matrix));
486 66410153 : const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
487 66410153 : assert(iter->next_block < shard->nblocks);
488 66410153 : dbm_block_t *blk = &shard->blocks[iter->next_block];
489 :
490 66410153 : *row = blk->row;
491 66410153 : *col = blk->col;
492 66410153 : *row_size = matrix->row_sizes[blk->row];
493 66410153 : *col_size = matrix->col_sizes[blk->col];
494 66410153 : *block = &shard->data[blk->offset];
495 :
496 66410153 : iter->next_block++;
497 66410153 : if (iter->next_block >= shard->nblocks) {
498 : // Advance to the next non-empty shard...
499 2846558 : iter->next_shard += omp_get_num_threads();
500 2846558 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
501 0 : matrix->shards[iter->next_shard].nblocks == 0) {
502 0 : iter->next_shard += omp_get_num_threads();
503 : }
504 2846558 : iter->next_block = 0; // ...and continue with its first block.
505 : }
506 66410153 : }
507 :
508 : /*******************************************************************************
509 : * \brief Releases the given iterator.
510 : * \author Ole Schuett
511 : ******************************************************************************/
512 3385070 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
513 :
514 : /*******************************************************************************
515 : * \brief Private routine to accumulate using Kahan's summation.
516 : * \author Hans Pabst
517 : ******************************************************************************/
518 14383468 : static double kahan_sum(double value, double *accumulator,
519 : double *compensation) {
520 14383468 : double r, c;
521 14383468 : assert(NULL != accumulator && NULL != compensation);
522 14383468 : c = value - *compensation;
523 14383468 : r = *accumulator + c;
524 14383468 : *compensation = (r - *accumulator) - c;
525 14383468 : *accumulator = r;
526 14383468 : return r;
527 : }
528 :
529 : /*******************************************************************************
530 : * \brief Computes a checksum of the given matrix.
531 : * \author Ole Schuett
532 : ******************************************************************************/
533 190 : double dbm_checksum(const dbm_matrix_t *matrix) {
534 190 : double checksum = 0.0, compensation = 0.0;
535 380 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
536 190 : const dbm_shard_t *shard = &matrix->shards[ishard];
537 14383658 : for (int i = 0; i < shard->data_size; i++) {
538 14383468 : const double value = shard->data[i];
539 14383468 : kahan_sum(value * value, &checksum, &compensation);
540 : }
541 : }
542 190 : cp_mpi_sum_double(&checksum, 1, matrix->dist->comm);
543 190 : return checksum;
544 : }
545 :
546 : /*******************************************************************************
547 : * \brief Returns the largest absolute value of all matrix elements.
548 : * \author Ole Schuett
549 : ******************************************************************************/
550 48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
551 48 : double maxabs = 0.0;
552 96 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
553 48 : const dbm_shard_t *shard = &matrix->shards[ishard];
554 1599050 : for (int i = 0; i < shard->data_size; i++) {
555 1599002 : maxabs = fmax(maxabs, fabs(shard->data[i]));
556 : }
557 : }
558 48 : cp_mpi_max_double(&maxabs, 1, matrix->dist->comm);
559 48 : return maxabs;
560 : }
561 :
562 : /*******************************************************************************
563 : * \brief Calculates maximum relative difference between matrix_a and matrix_b.
564 : * \author Hans Pabst
565 : ******************************************************************************/
566 0 : double dbm_maxeps(const dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
567 0 : const int num_shards = dbm_get_num_shards(matrix_a);
568 0 : double epsilon = 0;
569 :
570 0 : assert(omp_get_num_threads() == 1);
571 0 : assert(matrix_a->dist == matrix_b->dist);
572 0 : assert(num_shards == dbm_get_num_shards(matrix_b));
573 :
574 0 : #pragma omp parallel for DBM_OMP_SCHEDULE
575 : for (int ishard = 0; ishard < num_shards; ++ishard) {
576 : const dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
577 : const dbm_shard_t *const shard_b = &matrix_b->shards[ishard];
578 : assert(shard_a->nblocks == shard_b->nblocks);
579 : for (int iblock = 0; iblock < shard_a->nblocks; ++iblock) {
580 : const dbm_block_t *const blk_a = &shard_a->blocks[iblock];
581 : const dbm_block_t *const blk_b =
582 : dbm_shard_lookup(shard_b, blk_a->row, blk_a->col);
583 : const int row_size = matrix_a->row_sizes[blk_a->row];
584 : const int col_size = matrix_a->col_sizes[blk_a->col];
585 : assert(row_size == matrix_b->row_sizes[blk_b->row]);
586 : assert(col_size == matrix_b->col_sizes[blk_b->col]);
587 : const double *const data_a = &shard_a->data[blk_a->offset];
588 : const double *const data_b = &shard_b->data[blk_b->offset];
589 : const int block_size = row_size * col_size;
590 : for (int i = 0; i < block_size; ++i) {
591 : const double d = data_a[i] - data_b[i];
592 : const double e = fabs(0 != data_a[i] ? (d / data_a[i]) : d);
593 : if (epsilon < e) {
594 : epsilon = e;
595 : }
596 : }
597 : }
598 : }
599 :
600 0 : cp_mpi_max_double(&epsilon, 1, matrix_a->dist->comm);
601 0 : return epsilon;
602 : }
603 :
604 : /*******************************************************************************
605 : * \brief Returns the name of the matrix of the given matrix.
606 : * \author Ole Schuett
607 : ******************************************************************************/
608 1787429 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
609 :
610 : /*******************************************************************************
611 : * \brief Returns the number of local Non-Zero Elements of the given matrix.
612 : * \author Ole Schuett
613 : ******************************************************************************/
614 1885694 : int dbm_get_nze(const dbm_matrix_t *matrix) {
615 1885694 : int nze = 0;
616 3771388 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
617 1885694 : nze += matrix->shards[ishard].data_size;
618 : }
619 1885694 : return nze;
620 : }
621 :
622 : /*******************************************************************************
623 : * \brief Returns the number of local blocks of the given matrix.
624 : * \author Ole Schuett
625 : ******************************************************************************/
626 1029546 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
627 1029546 : int nblocks = 0;
628 2059092 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
629 1029546 : nblocks += matrix->shards[ishard].nblocks;
630 : }
631 1029546 : return nblocks;
632 : }
633 :
634 : /*******************************************************************************
635 : * \brief Returns the row block sizes of the given matrix.
636 : * \author Ole Schuett
637 : ******************************************************************************/
638 3883139 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
639 : const int **row_sizes) {
640 3883139 : *nrows = matrix->nrows;
641 3883139 : *row_sizes = matrix->row_sizes;
642 3883139 : }
643 :
644 : /*******************************************************************************
645 : * \brief Returns the column block sizes of the given matrix.
646 : * \author Ole Schuett
647 : ******************************************************************************/
648 2643885 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
649 : const int **col_sizes) {
650 2643885 : *ncols = matrix->ncols;
651 2643885 : *col_sizes = matrix->col_sizes;
652 2643885 : }
653 :
654 : /*******************************************************************************
655 : * \brief Returns the local row block sizes of the given matrix.
656 : * \author Ole Schuett
657 : ******************************************************************************/
658 271476 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
659 : const int **local_rows) {
660 271476 : *nlocal_rows = matrix->dist->rows.nlocals;
661 271476 : *local_rows = matrix->dist->rows.local_indicies;
662 271476 : }
663 :
664 : /*******************************************************************************
665 : * \brief Returns the local column block sizes of the given matrix.
666 : * \author Ole Schuett
667 : ******************************************************************************/
668 125400 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
669 : const int **local_cols) {
670 125400 : *nlocal_cols = matrix->dist->cols.nlocals;
671 125400 : *local_cols = matrix->dist->cols.local_indicies;
672 125400 : }
673 :
674 : /*******************************************************************************
675 : * \brief Returns the MPI rank on which the given block should be stored.
676 : * \author Ole Schuett
677 : ******************************************************************************/
678 92208768 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
679 : const int col) {
680 92208768 : return dbm_distribution_stored_coords(matrix->dist, row, col);
681 : }
682 :
683 : /*******************************************************************************
684 : * \brief Returns the distribution of the given matrix.
685 : * \author Ole Schuett
686 : ******************************************************************************/
687 1270981 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
688 1270981 : return matrix->dist;
689 : }
690 :
691 : // EOF
|