Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_matrix.h"
8 : #include "dbm_hyperparams.h"
9 :
10 : #include <assert.h>
11 : #include <math.h>
12 : #include <omp.h>
13 : #include <stdbool.h>
14 : #include <stddef.h>
15 : #include <stdlib.h>
16 : #include <string.h>
17 :
18 : /*******************************************************************************
19 : * \brief Creates a new matrix.
20 : * \author Ole Schuett
21 : ******************************************************************************/
22 1959766 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
23 : const char name[], const int nrows, const int ncols,
24 : const int row_sizes[nrows], const int col_sizes[ncols]) {
25 1959766 : assert(omp_get_num_threads() == 1);
26 :
27 1959766 : dbm_matrix_t *const matrix = calloc(1, sizeof(dbm_matrix_t));
28 :
29 1959766 : assert(dist->rows.length == nrows);
30 1959766 : assert(dist->cols.length == ncols);
31 1959766 : dbm_distribution_hold(dist);
32 1959766 : matrix->dist = dist;
33 :
34 1959766 : size_t size = (strlen(name) + 1) * sizeof(char);
35 1959766 : matrix->name = malloc(size);
36 1959766 : assert(matrix->name != NULL && name != NULL);
37 1959766 : memcpy(matrix->name, name, size);
38 :
39 1959766 : matrix->nrows = nrows;
40 1959766 : matrix->ncols = ncols;
41 :
42 1959766 : size = nrows * sizeof(int);
43 1959766 : matrix->row_sizes = malloc(size);
44 1959766 : assert(matrix->row_sizes != NULL || size == 0);
45 1959766 : if (size != 0) {
46 1959206 : memcpy(matrix->row_sizes, row_sizes, size);
47 : }
48 :
49 1959766 : size = ncols * sizeof(int);
50 1959766 : matrix->col_sizes = malloc(size);
51 1959766 : assert(matrix->col_sizes != NULL || size == 0);
52 1959766 : if (size != 0) {
53 1955602 : memcpy(matrix->col_sizes, col_sizes, size);
54 : }
55 :
56 1959766 : const int num_shards = dbm_get_num_shards(matrix);
57 1959766 : matrix->shards = malloc(num_shards * sizeof(dbm_shard_t));
58 1959766 : assert(matrix->shards != NULL || num_shards == 0);
59 1959766 : #pragma omp parallel for
60 : for (int ishard = 0; ishard < num_shards; ishard++) {
61 : dbm_shard_init(&matrix->shards[ishard]);
62 : }
63 :
64 1959766 : assert(*matrix_out == NULL);
65 1959766 : *matrix_out = matrix;
66 1959766 : }
67 :
68 : /*******************************************************************************
69 : * \brief Releases a matrix and all its ressources.
70 : * \author Ole Schuett
71 : ******************************************************************************/
72 1959766 : void dbm_release(dbm_matrix_t *matrix) {
73 1959766 : assert(omp_get_num_threads() == 1);
74 3919532 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
75 1959766 : dbm_shard_release(&matrix->shards[ishard]);
76 : }
77 1959766 : dbm_distribution_release(matrix->dist);
78 1959766 : free(matrix->row_sizes);
79 1959766 : free(matrix->col_sizes);
80 1959766 : free(matrix->shards);
81 1959766 : free(matrix->name);
82 1959766 : free(matrix);
83 1959766 : }
84 :
85 : /*******************************************************************************
86 : * \brief Copies content of matrix_b into matrix_a.
87 : * Matrices must have the same row/col block sizes and distribution.
88 : * \author Ole Schuett
89 : ******************************************************************************/
90 489662 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
91 489662 : assert(omp_get_num_threads() == 1);
92 :
93 489662 : assert(matrix_b->nrows == matrix_a->nrows);
94 5811168 : for (int i = 0; i < matrix_b->nrows; i++) {
95 5321506 : assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
96 : }
97 489662 : assert(matrix_b->ncols == matrix_a->ncols);
98 29933781 : for (int i = 0; i < matrix_b->ncols; i++) {
99 29444119 : assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
100 : }
101 :
102 489662 : assert(matrix_a->dist == matrix_b->dist);
103 :
104 489662 : #pragma omp parallel for DBM_OMP_SCHEDULE
105 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
106 : dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
107 : }
108 489662 : }
109 :
110 : /*******************************************************************************
111 : * \brief Copies content of matrix_b into matrix_a.
112 : * Matrices may have different distributions.
113 : * \author Ole Schuett
114 : ******************************************************************************/
115 144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
116 144 : assert(omp_get_num_threads() == 1);
117 144 : assert(matrix->nrows == redist->nrows);
118 6384 : for (int i = 0; i < matrix->nrows; i++) {
119 6240 : assert(matrix->row_sizes[i] == redist->row_sizes[i]);
120 : }
121 144 : assert(matrix->ncols == redist->ncols);
122 6384 : for (int i = 0; i < matrix->ncols; i++) {
123 6240 : assert(matrix->col_sizes[i] == redist->col_sizes[i]);
124 : }
125 :
126 144 : assert(cp_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
127 144 : const cp_mpi_comm_t comm = redist->dist->comm;
128 144 : const int nranks = cp_mpi_comm_size(comm);
129 :
130 : // 1st pass: Compute send_count.
131 144 : int send_count[nranks];
132 144 : memset(send_count, 0, nranks * sizeof(int));
133 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
134 144 : const dbm_shard_t *const shard = &matrix->shards[ishard];
135 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
136 8792 : const dbm_block_t *const blk = &shard->blocks[iblock];
137 8792 : const int row_size = matrix->row_sizes[blk->row];
138 8792 : const int col_size = matrix->col_sizes[blk->col];
139 8792 : const int block_size = row_size * col_size;
140 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
141 8792 : assert(0 <= rank && rank < nranks);
142 8792 : send_count[rank] += 2 + block_size;
143 : }
144 : }
145 :
146 : // 1st communication: Exchange counts.
147 144 : int recv_count[nranks];
148 144 : cp_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
149 :
150 : // Compute displacements and allocate data buffers.
151 144 : int send_displ[nranks + 1], recv_displ[nranks + 1];
152 144 : send_displ[0] = recv_displ[0] = 0;
153 432 : for (int irank = 1; irank <= nranks; irank++) {
154 288 : send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
155 288 : recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
156 : }
157 144 : const int total_send_count = send_displ[nranks];
158 144 : const int total_recv_count = recv_displ[nranks];
159 144 : double *const data_send = cp_mpi_alloc_mem(total_send_count * sizeof(double));
160 144 : double *const data_recv = cp_mpi_alloc_mem(total_recv_count * sizeof(double));
161 :
162 : // 2nd pass: Fill send_data.
163 144 : int send_data_positions[nranks];
164 144 : memcpy(send_data_positions, send_displ, nranks * sizeof(int));
165 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
166 144 : const dbm_shard_t *const shard = &matrix->shards[ishard];
167 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
168 8792 : const dbm_block_t *const blk = &shard->blocks[iblock];
169 8792 : const double *const blk_data = &shard->data[blk->offset];
170 8792 : const int row_size = matrix->row_sizes[blk->row];
171 8792 : const int col_size = matrix->col_sizes[blk->col];
172 8792 : const int block_size = row_size * col_size;
173 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
174 8792 : const int pos = send_data_positions[rank];
175 8792 : data_send[pos + 0] = blk->row; // send integers as doubles
176 8792 : data_send[pos + 1] = blk->col;
177 8792 : memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
178 8792 : send_data_positions[rank] += 2 + block_size;
179 : }
180 : }
181 432 : for (int irank = 0; irank < nranks; irank++) {
182 288 : assert(send_data_positions[irank] == send_displ[irank + 1]);
183 : }
184 :
185 : // 2nd communication: Exchange data.
186 144 : cp_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
187 : recv_count, recv_displ, comm);
188 144 : cp_mpi_free_mem(data_send);
189 :
190 : // 3rd pass: Unpack data.
191 144 : dbm_clear(redist);
192 144 : int recv_data_pos = 0;
193 8936 : while (recv_data_pos < total_recv_count) {
194 8792 : const int row = (int)data_recv[recv_data_pos + 0];
195 8792 : const int col = (int)data_recv[recv_data_pos + 1];
196 8792 : assert(data_recv[recv_data_pos + 0] == (double)row);
197 8792 : assert(data_recv[recv_data_pos + 1] == (double)col);
198 8792 : dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
199 8792 : const int row_size = matrix->row_sizes[row];
200 8792 : const int col_size = matrix->col_sizes[col];
201 8792 : const int block_size = row_size * col_size;
202 8792 : recv_data_pos += 2 + block_size;
203 : }
204 144 : assert(recv_data_pos == total_recv_count);
205 144 : cp_mpi_free_mem(data_recv);
206 144 : }
207 :
208 : /*******************************************************************************
209 : * \brief Looks up a block from given matrics. This routine is thread-safe.
210 : * If the block is not found then a null pointer is returned.
211 : * \author Ole Schuett
212 : ******************************************************************************/
213 23127892 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
214 : double **block, int *row_size, int *col_size) {
215 23127892 : assert(0 <= row && row < matrix->nrows);
216 23127892 : assert(0 <= col && col < matrix->ncols);
217 23127892 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
218 23127892 : *row_size = matrix->row_sizes[row];
219 23127892 : *col_size = matrix->col_sizes[col];
220 23127892 : *block = NULL;
221 :
222 23127892 : const int ishard = dbm_get_shard_index(matrix, row, col);
223 23127892 : const dbm_shard_t *const shard = &matrix->shards[ishard];
224 23127892 : const dbm_block_t *const blk = dbm_shard_lookup(shard, row, col);
225 23127892 : if (blk != NULL) {
226 21493951 : *block = &shard->data[blk->offset];
227 : }
228 23127892 : }
229 :
230 : /*******************************************************************************
231 : * \brief Adds a block to given matrix. This routine is thread-safe.
232 : * If block already exist then it gets overwritten (or summed).
233 : * \author Ole Schuett
234 : ******************************************************************************/
235 36873409 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
236 : const bool summation, const double *block) {
237 36873409 : assert(0 <= row && row < matrix->nrows);
238 36873409 : assert(0 <= col && col < matrix->ncols);
239 36873409 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
240 36873409 : const int row_size = matrix->row_sizes[row];
241 36873409 : const int col_size = matrix->col_sizes[col];
242 36873409 : const int block_size = row_size * col_size;
243 :
244 36873409 : const int ishard = dbm_get_shard_index(matrix, row, col);
245 36873409 : dbm_shard_t *const shard = &matrix->shards[ishard];
246 36873409 : omp_set_lock(&shard->lock);
247 36873409 : const dbm_block_t *const blk =
248 36873409 : dbm_shard_get_or_allocate_block(shard, row, col, block_size);
249 36873409 : double *const blk_data = &shard->data[blk->offset];
250 36873409 : if (summation) {
251 3808163 : assert(blk_data != NULL || 0 == block_size);
252 1307511693 : for (int i = 0; i < block_size; i++) {
253 1303703530 : blk_data[i] += block[i];
254 : }
255 33065246 : } else if (block_size != 0) {
256 33060147 : memcpy(blk_data, block, block_size * sizeof(double));
257 : }
258 36873409 : omp_unset_lock(&shard->lock);
259 36873409 : }
260 :
261 : /*******************************************************************************
262 : * \brief Remove all blocks from matrix, but does not release underlying memory.
263 : * \author Ole Schuett
264 : ******************************************************************************/
265 2331774 : void dbm_clear(dbm_matrix_t *matrix) {
266 2331774 : assert(omp_get_num_threads() == 1);
267 :
268 2331774 : #pragma omp parallel for DBM_OMP_SCHEDULE
269 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
270 : dbm_shard_t *const shard = &matrix->shards[ishard];
271 : shard->data_promised = shard->data_size = shard->nblocks = 0;
272 : // Does not deallocate memory, hence data_allocated remains unchanged.
273 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
274 : }
275 2331774 : }
276 :
277 : /*******************************************************************************
278 : * \brief Removes all blocks from the matrix whose norm is below the threshold.
279 : * Blocks of size zero are always kept.
280 : * \author Ole Schuett
281 : ******************************************************************************/
282 692336 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
283 692336 : assert(omp_get_num_threads() == 1);
284 :
285 692336 : if (eps == 0.0) {
286 : return;
287 : }
288 640582 : const double eps2 = eps * eps;
289 :
290 640582 : #pragma omp parallel for DBM_OMP_SCHEDULE
291 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
292 : dbm_shard_t *const shard = &matrix->shards[ishard];
293 : const int old_nblocks = shard->nblocks;
294 : shard->nblocks = shard->data_promised = 0;
295 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
296 :
297 : for (int iblock = 0; iblock < old_nblocks; iblock++) {
298 : const dbm_block_t old_blk = shard->blocks[iblock];
299 : const double *old_blk_data = &shard->data[old_blk.offset];
300 : const int row_size = matrix->row_sizes[old_blk.row];
301 : const int col_size = matrix->col_sizes[old_blk.col];
302 : const int block_size = row_size * col_size;
303 : double norm = 0.0;
304 : for (int i = 0; i < block_size; i++) {
305 : const double value = old_blk_data[i];
306 : norm += value * value;
307 : if (eps2 <= norm) {
308 : break;
309 : }
310 : }
311 : // For historic reasons zero-sized blocks are never filtered.
312 : if (block_size > 0 && norm < eps2) {
313 : continue; // filter the block
314 : }
315 : // Re-create block.
316 : dbm_block_t *const new_blk = dbm_shard_promise_new_block(
317 : shard, old_blk.row, old_blk.col, block_size);
318 : assert(new_blk->offset <= old_blk.offset);
319 : if (new_blk->offset != old_blk.offset) {
320 : // Using memmove because it can handle overlapping buffers.
321 : double *new_blk_data = &shard->data[new_blk->offset];
322 : memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
323 : }
324 : }
325 : shard->data_size = shard->data_promised;
326 : // TODO: Could call realloc to release excess memory.
327 : }
328 : }
329 :
330 : /*******************************************************************************
331 : * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
332 : * This routine must always be called within an OpenMP parallel region.
333 : * \author Ole Schuett
334 : ******************************************************************************/
335 1630787 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
336 : const int rows[], const int cols[]) {
337 1630787 : assert(omp_get_num_threads() == omp_get_max_threads() &&
338 : "Please call dbm_reserve_blocks within an OpenMP parallel region.");
339 1630787 : const int my_rank = matrix->dist->my_rank;
340 :
341 32739773 : for (int i = 0; i < nblocks; i++) {
342 31108986 : const int row = rows[i], col = cols[i];
343 31108986 : assert(0 <= row && row < matrix->nrows);
344 31108986 : assert(0 <= col && col < matrix->ncols);
345 31108986 : assert(dbm_get_stored_coordinates(matrix, row, col) == my_rank);
346 31108986 : const int ishard = dbm_get_shard_index(matrix, row, col);
347 31108986 : dbm_shard_t *const shard = &matrix->shards[ishard];
348 31108986 : const int row_size = matrix->row_sizes[row];
349 31108986 : const int col_size = matrix->col_sizes[col];
350 31108986 : const int block_size = row_size * col_size;
351 31108986 : omp_set_lock(&shard->lock);
352 31108986 : dbm_shard_get_or_promise_block(shard, row, col, block_size);
353 31108986 : omp_unset_lock(&shard->lock);
354 : }
355 1630787 : #pragma omp barrier
356 :
357 : #pragma omp for DBM_OMP_SCHEDULE
358 1630787 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
359 1630787 : dbm_shard_t *const shard = &matrix->shards[ishard];
360 1630787 : dbm_shard_allocate_promised_blocks(shard);
361 : }
362 1630787 : }
363 :
364 : /*******************************************************************************
365 : * \brief Multiplies all entries in the given matrix by the given factor alpha.
366 : * \author Ole Schuett
367 : ******************************************************************************/
368 532593 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
369 532593 : assert(omp_get_num_threads() == 1);
370 532593 : if (alpha == 1.0) {
371 : return;
372 : }
373 407389 : if (alpha == 0.0) {
374 395517 : dbm_zero(matrix);
375 395517 : return;
376 : }
377 :
378 11872 : #pragma omp parallel for DBM_OMP_SCHEDULE
379 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
380 : dbm_shard_t *const shard = &matrix->shards[ishard];
381 : for (int i = 0; i < shard->data_size; i++) {
382 : shard->data[i] *= alpha;
383 : }
384 : }
385 : }
386 :
387 : /*******************************************************************************
388 : * \brief Sets all blocks in the given matrix to zero.
389 : * \author Ole Schuett
390 : ******************************************************************************/
391 395517 : void dbm_zero(dbm_matrix_t *matrix) {
392 395517 : assert(omp_get_num_threads() == 1);
393 :
394 395517 : #pragma omp parallel for DBM_OMP_SCHEDULE
395 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
396 : dbm_shard_t *const shard = &matrix->shards[ishard];
397 : if (shard->data != NULL) {
398 : memset(shard->data, 0, shard->data_size * sizeof(double));
399 : }
400 : }
401 395517 : }
402 :
403 : /*******************************************************************************
404 : * \brief Adds matrix_b to matrix_a.
405 : * \author Ole Schuett
406 : ******************************************************************************/
407 242460 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
408 242460 : assert(omp_get_num_threads() == 1);
409 242460 : assert(matrix_a->dist == matrix_b->dist);
410 :
411 242460 : #pragma omp parallel for DBM_OMP_SCHEDULE
412 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
413 : dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
414 : const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
415 : for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
416 : const dbm_block_t blk_b = shard_b->blocks[iblock];
417 : const int row_size = matrix_b->row_sizes[blk_b.row];
418 : const int col_size = matrix_b->col_sizes[blk_b.col];
419 : assert(row_size == matrix_a->row_sizes[blk_b.row]);
420 : assert(col_size == matrix_a->col_sizes[blk_b.col]);
421 : const int block_size = row_size * col_size;
422 : const dbm_block_t *const blk_a = dbm_shard_get_or_allocate_block(
423 : shard_a, blk_b.row, blk_b.col, block_size);
424 : double *const data_a = &shard_a->data[blk_a->offset];
425 : const double *const data_b = &shard_b->data[blk_b.offset];
426 : for (int i = 0; i < block_size; i++) {
427 : data_a[i] += data_b[i];
428 : }
429 : }
430 : }
431 242460 : }
432 :
433 : /*******************************************************************************
434 : * \brief Creates an iterator for the blocks of the given matrix.
435 : * The iteration order is not stable.
436 : * This routine must always be called within an OpenMP parallel region.
437 : * \author Ole Schuett
438 : ******************************************************************************/
439 3868855 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
440 3868855 : assert(omp_get_num_threads() == omp_get_max_threads() &&
441 : "Please call dbm_iterator_start within an OpenMP parallel region.");
442 3868855 : dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
443 3868855 : assert(iter != NULL);
444 3868855 : iter->matrix = matrix;
445 3868855 : iter->next_block = 0;
446 3868855 : iter->next_shard = omp_get_thread_num();
447 4500302 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
448 3868855 : matrix->shards[iter->next_shard].nblocks == 0) {
449 631447 : iter->next_shard += omp_get_num_threads();
450 : }
451 3868855 : assert(*iter_out == NULL);
452 3868855 : *iter_out = iter;
453 3868855 : }
454 :
455 : /*******************************************************************************
456 : * \brief Returns number of blocks the iterator will provide to calling thread.
457 : * \author Ole Schuett
458 : ******************************************************************************/
459 717096 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
460 717096 : int num_blocks = 0;
461 717096 : for (int ishard = omp_get_thread_num();
462 1434192 : ishard < dbm_get_num_shards(iter->matrix);
463 717096 : ishard += omp_get_num_threads()) {
464 717096 : num_blocks += iter->matrix->shards[ishard].nblocks;
465 : }
466 717096 : return num_blocks;
467 : }
468 :
469 : /*******************************************************************************
470 : * \brief Tests whether the given iterator has any block left.
471 : * \author Ole Schuett
472 : ******************************************************************************/
473 64949272 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
474 64949272 : return iter->next_shard < dbm_get_num_shards(iter->matrix);
475 : }
476 :
477 : /*******************************************************************************
478 : * \brief Returns the next block from the given iterator.
479 : * \author Ole Schuett
480 : ******************************************************************************/
481 72083322 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
482 : double **block, int *row_size, int *col_size) {
483 72083322 : const dbm_matrix_t *matrix = iter->matrix;
484 72083322 : assert(iter->next_shard < dbm_get_num_shards(matrix));
485 72083322 : const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
486 72083322 : assert(iter->next_block < shard->nblocks);
487 72083322 : dbm_block_t *blk = &shard->blocks[iter->next_block];
488 :
489 72083322 : *row = blk->row;
490 72083322 : *col = blk->col;
491 72083322 : *row_size = matrix->row_sizes[blk->row];
492 72083322 : *col_size = matrix->col_sizes[blk->col];
493 72083322 : *block = &shard->data[blk->offset];
494 :
495 72083322 : iter->next_block++;
496 72083322 : if (iter->next_block >= shard->nblocks) {
497 : // Advance to the next non-empty shard...
498 3237408 : iter->next_shard += omp_get_num_threads();
499 3237408 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
500 0 : matrix->shards[iter->next_shard].nblocks == 0) {
501 0 : iter->next_shard += omp_get_num_threads();
502 : }
503 3237408 : iter->next_block = 0; // ...and continue with its first block.
504 : }
505 72083322 : }
506 :
507 : /*******************************************************************************
508 : * \brief Releases the given iterator.
509 : * \author Ole Schuett
510 : ******************************************************************************/
511 3868855 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
512 :
513 : /*******************************************************************************
514 : * \brief Private routine to accumulate using Kahan's summation.
515 : * \author Hans Pabst
516 : ******************************************************************************/
517 14383468 : static double kahan_sum(double value, double *accumulator,
518 : double *compensation) {
519 14383468 : double r, c;
520 14383468 : assert(NULL != accumulator && NULL != compensation);
521 14383468 : c = value - *compensation;
522 14383468 : r = *accumulator + c;
523 14383468 : *compensation = (r - *accumulator) - c;
524 14383468 : *accumulator = r;
525 14383468 : return r;
526 : }
527 :
528 : /*******************************************************************************
529 : * \brief Computes a checksum of the given matrix.
530 : * \author Ole Schuett
531 : ******************************************************************************/
532 190 : double dbm_checksum(const dbm_matrix_t *matrix) {
533 190 : double checksum = 0.0, compensation = 0.0;
534 380 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
535 190 : const dbm_shard_t *shard = &matrix->shards[ishard];
536 14383658 : for (int i = 0; i < shard->data_size; i++) {
537 14383468 : const double value = shard->data[i];
538 14383468 : kahan_sum(value * value, &checksum, &compensation);
539 : }
540 : }
541 190 : cp_mpi_sum_double(&checksum, 1, matrix->dist->comm);
542 190 : return checksum;
543 : }
544 :
545 : /*******************************************************************************
546 : * \brief Returns the largest absolute value of all matrix elements.
547 : * \author Ole Schuett
548 : ******************************************************************************/
549 48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
550 48 : double maxabs = 0.0;
551 96 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
552 48 : const dbm_shard_t *shard = &matrix->shards[ishard];
553 1599050 : for (int i = 0; i < shard->data_size; i++) {
554 1599002 : maxabs = fmax(maxabs, fabs(shard->data[i]));
555 : }
556 : }
557 48 : cp_mpi_max_double(&maxabs, 1, matrix->dist->comm);
558 48 : return maxabs;
559 : }
560 :
561 : /*******************************************************************************
562 : * \brief Calculates maximum relative difference between matrix_a and matrix_b.
563 : * \author Hans Pabst
564 : ******************************************************************************/
565 0 : double dbm_maxeps(const dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
566 0 : const int num_shards = dbm_get_num_shards(matrix_a);
567 0 : double epsilon = 0;
568 :
569 0 : assert(omp_get_num_threads() == 1);
570 0 : assert(matrix_a->dist == matrix_b->dist);
571 0 : assert(num_shards == dbm_get_num_shards(matrix_b));
572 :
573 : // Handles asymmetric sparsity: blocks in A but not B are compared against
574 : // zero, and blocks in B but not A are checked in a second pass. The
575 : // reduction(max) avoids the previous data race on the shared epsilon.
576 0 : #pragma omp parallel for reduction(max : epsilon) DBM_OMP_SCHEDULE
577 : for (int ishard = 0; ishard < num_shards; ++ishard) {
578 : const dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
579 : const dbm_shard_t *const shard_b = &matrix_b->shards[ishard];
580 : for (int iblock = 0; iblock < shard_a->nblocks; ++iblock) {
581 : const dbm_block_t *const blk_a = &shard_a->blocks[iblock];
582 : const dbm_block_t *const blk_b =
583 : dbm_shard_lookup(shard_b, blk_a->row, blk_a->col);
584 : const int row_size = matrix_a->row_sizes[blk_a->row];
585 : const int col_size = matrix_a->col_sizes[blk_a->col];
586 : assert(NULL == blk_b || row_size == matrix_b->row_sizes[blk_b->row]);
587 : assert(NULL == blk_b || col_size == matrix_b->col_sizes[blk_b->col]);
588 : const double *const data_a = &shard_a->data[blk_a->offset];
589 : const double *const data_b =
590 : (NULL == blk_b) ? NULL : &shard_b->data[blk_b->offset];
591 : const int block_size = row_size * col_size;
592 : for (int i = 0; i < block_size; ++i) {
593 : const double d = data_a[i] - (NULL == data_b ? 0.0 : data_b[i]);
594 : const double e = fabs(0 != data_a[i] ? (d / data_a[i]) : d);
595 : if (epsilon < e) {
596 : epsilon = e;
597 : }
598 : }
599 : }
600 : // Second pass: blocks present in B but absent from A (treat A as zero).
601 : for (int iblock = 0; iblock < shard_b->nblocks; ++iblock) {
602 : const dbm_block_t *const blk_b = &shard_b->blocks[iblock];
603 : if (NULL != dbm_shard_lookup(shard_a, blk_b->row, blk_b->col)) {
604 : continue;
605 : }
606 : const int row_size = matrix_b->row_sizes[blk_b->row];
607 : const int col_size = matrix_b->col_sizes[blk_b->col];
608 : const double *const data_b = &shard_b->data[blk_b->offset];
609 : const int block_size = row_size * col_size;
610 : for (int i = 0; i < block_size; ++i) {
611 : const double e = fabs(data_b[i]);
612 : if (epsilon < e) {
613 : epsilon = e;
614 : }
615 : }
616 : }
617 : }
618 :
619 0 : cp_mpi_max_double(&epsilon, 1, matrix_a->dist->comm);
620 0 : return epsilon;
621 : }
622 :
623 : /*******************************************************************************
624 : * \brief Returns the name of the matrix of the given matrix.
625 : * \author Ole Schuett
626 : ******************************************************************************/
627 2016589 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
628 :
629 : /*******************************************************************************
630 : * \brief Returns the number of local Non-Zero Elements of the given matrix.
631 : * \author Ole Schuett
632 : ******************************************************************************/
633 2059255 : int dbm_get_nze(const dbm_matrix_t *matrix) {
634 2059255 : int nze = 0;
635 4118510 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
636 2059255 : nze += matrix->shards[ishard].data_size;
637 : }
638 2059255 : return nze;
639 : }
640 :
641 : /*******************************************************************************
642 : * \brief Returns the number of local blocks of the given matrix.
643 : * \author Ole Schuett
644 : ******************************************************************************/
645 1080392 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
646 1080392 : int nblocks = 0;
647 2160784 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
648 1080392 : nblocks += matrix->shards[ishard].nblocks;
649 : }
650 1080392 : return nblocks;
651 : }
652 :
653 : /*******************************************************************************
654 : * \brief Returns the row block sizes of the given matrix.
655 : * \author Ole Schuett
656 : ******************************************************************************/
657 4384814 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
658 : const int **row_sizes) {
659 4384814 : *nrows = matrix->nrows;
660 4384814 : *row_sizes = matrix->row_sizes;
661 4384814 : }
662 :
663 : /*******************************************************************************
664 : * \brief Returns the column block sizes of the given matrix.
665 : * \author Ole Schuett
666 : ******************************************************************************/
667 2917120 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
668 : const int **col_sizes) {
669 2917120 : *ncols = matrix->ncols;
670 2917120 : *col_sizes = matrix->col_sizes;
671 2917120 : }
672 :
673 : /*******************************************************************************
674 : * \brief Returns the local row block sizes of the given matrix.
675 : * \author Ole Schuett
676 : ******************************************************************************/
677 309492 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
678 : const int **local_rows) {
679 309492 : *nlocal_rows = matrix->dist->rows.nlocals;
680 309492 : *local_rows = matrix->dist->rows.local_indicies;
681 309492 : }
682 :
683 : /*******************************************************************************
684 : * \brief Returns the local column block sizes of the given matrix.
685 : * \author Ole Schuett
686 : ******************************************************************************/
687 138352 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
688 : const int **local_cols) {
689 138352 : *nlocal_cols = matrix->dist->cols.nlocals;
690 138352 : *local_cols = matrix->dist->cols.local_indicies;
691 138352 : }
692 :
693 : /*******************************************************************************
694 : * \brief Returns the MPI rank on which the given block should be stored.
695 : * \author Ole Schuett
696 : ******************************************************************************/
697 99289745 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
698 : const int col) {
699 99289745 : return dbm_distribution_stored_coords(matrix->dist, row, col);
700 : }
701 :
702 : /*******************************************************************************
703 : * \brief Returns the distribution of the given matrix.
704 : * \author Ole Schuett
705 : ******************************************************************************/
706 1419064 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
707 1419064 : return matrix->dist;
708 : }
709 :
710 : // EOF
|