Line data Source code
1 : /*----------------------------------------------------------------------------*/
2 : /* CP2K: A general program to perform molecular dynamics simulations */
3 : /* Copyright 2000-2026 CP2K developers group <https://cp2k.org> */
4 : /* */
5 : /* SPDX-License-Identifier: BSD-3-Clause */
6 : /*----------------------------------------------------------------------------*/
7 : #include "dbm_matrix.h"
8 : #include "dbm_hyperparams.h"
9 :
10 : #include <assert.h>
11 : #include <math.h>
12 : #include <omp.h>
13 : #include <stdbool.h>
14 : #include <stddef.h>
15 : #include <stdlib.h>
16 : #include <string.h>
17 :
18 : /*******************************************************************************
19 : * \brief Creates a new matrix.
20 : * \author Ole Schuett
21 : ******************************************************************************/
22 1956802 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
23 : const char name[], const int nrows, const int ncols,
24 : const int row_sizes[nrows], const int col_sizes[ncols]) {
25 1956802 : assert(omp_get_num_threads() == 1);
26 :
27 1956802 : dbm_matrix_t *const matrix = calloc(1, sizeof(dbm_matrix_t));
28 :
29 1956802 : assert(dist->rows.length == nrows);
30 1956802 : assert(dist->cols.length == ncols);
31 1956802 : dbm_distribution_hold(dist);
32 1956802 : matrix->dist = dist;
33 :
34 1956802 : size_t size = (strlen(name) + 1) * sizeof(char);
35 1956802 : matrix->name = malloc(size);
36 1956802 : assert(matrix->name != NULL && name != NULL);
37 1956802 : memcpy(matrix->name, name, size);
38 :
39 1956802 : matrix->nrows = nrows;
40 1956802 : matrix->ncols = ncols;
41 :
42 1956802 : size = nrows * sizeof(int);
43 1956802 : matrix->row_sizes = malloc(size);
44 1956802 : assert(matrix->row_sizes != NULL || size == 0);
45 1956802 : if (size != 0) {
46 1956242 : memcpy(matrix->row_sizes, row_sizes, size);
47 : }
48 :
49 1956802 : size = ncols * sizeof(int);
50 1956802 : matrix->col_sizes = malloc(size);
51 1956802 : assert(matrix->col_sizes != NULL || size == 0);
52 1956802 : if (size != 0) {
53 1952638 : memcpy(matrix->col_sizes, col_sizes, size);
54 : }
55 :
56 1956802 : const int num_shards = dbm_get_num_shards(matrix);
57 1956802 : matrix->shards = malloc(num_shards * sizeof(dbm_shard_t));
58 1956802 : assert(matrix->shards != NULL || num_shards == 0);
59 1956802 : #pragma omp parallel for
60 : for (int ishard = 0; ishard < num_shards; ishard++) {
61 : dbm_shard_init(&matrix->shards[ishard]);
62 : }
63 :
64 1956802 : assert(*matrix_out == NULL);
65 1956802 : *matrix_out = matrix;
66 1956802 : }
67 :
68 : /*******************************************************************************
69 : * \brief Releases a matrix and all its ressources.
70 : * \author Ole Schuett
71 : ******************************************************************************/
72 1956802 : void dbm_release(dbm_matrix_t *matrix) {
73 1956802 : assert(omp_get_num_threads() == 1);
74 3913604 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
75 1956802 : dbm_shard_release(&matrix->shards[ishard]);
76 : }
77 1956802 : dbm_distribution_release(matrix->dist);
78 1956802 : free(matrix->row_sizes);
79 1956802 : free(matrix->col_sizes);
80 1956802 : free(matrix->shards);
81 1956802 : free(matrix->name);
82 1956802 : free(matrix);
83 1956802 : }
84 :
85 : /*******************************************************************************
86 : * \brief Copies content of matrix_b into matrix_a.
87 : * Matrices must have the same row/col block sizes and distribution.
88 : * \author Ole Schuett
89 : ******************************************************************************/
90 488798 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
91 488798 : assert(omp_get_num_threads() == 1);
92 :
93 488798 : assert(matrix_b->nrows == matrix_a->nrows);
94 5805552 : for (int i = 0; i < matrix_b->nrows; i++) {
95 5316754 : assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
96 : }
97 488798 : assert(matrix_b->ncols == matrix_a->ncols);
98 29929461 : for (int i = 0; i < matrix_b->ncols; i++) {
99 29440663 : assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
100 : }
101 :
102 488798 : assert(matrix_a->dist == matrix_b->dist);
103 :
104 488798 : #pragma omp parallel for DBM_OMP_SCHEDULE
105 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
106 : dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
107 : }
108 488798 : }
109 :
110 : /*******************************************************************************
111 : * \brief Copies content of matrix_b into matrix_a.
112 : * Matrices may have different distributions.
113 : * \author Ole Schuett
114 : ******************************************************************************/
115 144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
116 144 : assert(omp_get_num_threads() == 1);
117 144 : assert(matrix->nrows == redist->nrows);
118 6384 : for (int i = 0; i < matrix->nrows; i++) {
119 6240 : assert(matrix->row_sizes[i] == redist->row_sizes[i]);
120 : }
121 144 : assert(matrix->ncols == redist->ncols);
122 6384 : for (int i = 0; i < matrix->ncols; i++) {
123 6240 : assert(matrix->col_sizes[i] == redist->col_sizes[i]);
124 : }
125 :
126 144 : assert(cp_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
127 144 : const cp_mpi_comm_t comm = redist->dist->comm;
128 144 : const int nranks = cp_mpi_comm_size(comm);
129 :
130 : // 1st pass: Compute send_count.
131 144 : int send_count[nranks];
132 144 : memset(send_count, 0, nranks * sizeof(int));
133 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
134 144 : const dbm_shard_t *const shard = &matrix->shards[ishard];
135 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
136 8792 : const dbm_block_t *const blk = &shard->blocks[iblock];
137 8792 : const int row_size = matrix->row_sizes[blk->row];
138 8792 : const int col_size = matrix->col_sizes[blk->col];
139 8792 : const int block_size = row_size * col_size;
140 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
141 8792 : assert(0 <= rank && rank < nranks);
142 8792 : send_count[rank] += 2 + block_size;
143 : }
144 : }
145 :
146 : // 1st communication: Exchange counts.
147 144 : int recv_count[nranks];
148 144 : cp_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
149 :
150 : // Compute displacements and allocate data buffers.
151 144 : int send_displ[nranks + 1], recv_displ[nranks + 1];
152 144 : send_displ[0] = recv_displ[0] = 0;
153 432 : for (int irank = 1; irank <= nranks; irank++) {
154 288 : send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
155 288 : recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
156 : }
157 144 : const int total_send_count = send_displ[nranks];
158 144 : const int total_recv_count = recv_displ[nranks];
159 144 : double *const data_send = cp_mpi_alloc_mem(total_send_count * sizeof(double));
160 144 : double *const data_recv = cp_mpi_alloc_mem(total_recv_count * sizeof(double));
161 :
162 : // 2nd pass: Fill send_data.
163 144 : int send_data_positions[nranks];
164 144 : memcpy(send_data_positions, send_displ, nranks * sizeof(int));
165 288 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
166 144 : const dbm_shard_t *const shard = &matrix->shards[ishard];
167 8936 : for (int iblock = 0; iblock < shard->nblocks; iblock++) {
168 8792 : const dbm_block_t *const blk = &shard->blocks[iblock];
169 8792 : const double *const blk_data = &shard->data[blk->offset];
170 8792 : const int row_size = matrix->row_sizes[blk->row];
171 8792 : const int col_size = matrix->col_sizes[blk->col];
172 8792 : const int block_size = row_size * col_size;
173 8792 : const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
174 8792 : const int pos = send_data_positions[rank];
175 8792 : data_send[pos + 0] = blk->row; // send integers as doubles
176 8792 : data_send[pos + 1] = blk->col;
177 8792 : memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
178 8792 : send_data_positions[rank] += 2 + block_size;
179 : }
180 : }
181 432 : for (int irank = 0; irank < nranks; irank++) {
182 288 : assert(send_data_positions[irank] == send_displ[irank + 1]);
183 : }
184 :
185 : // 2nd communication: Exchange data.
186 144 : cp_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
187 : recv_count, recv_displ, comm);
188 144 : cp_mpi_free_mem(data_send);
189 :
190 : // 3rd pass: Unpack data.
191 144 : dbm_clear(redist);
192 144 : int recv_data_pos = 0;
193 8936 : while (recv_data_pos < total_recv_count) {
194 8792 : const int row = (int)data_recv[recv_data_pos + 0];
195 8792 : const int col = (int)data_recv[recv_data_pos + 1];
196 8792 : assert(data_recv[recv_data_pos + 0] == (double)row);
197 8792 : assert(data_recv[recv_data_pos + 1] == (double)col);
198 8792 : dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
199 8792 : const int row_size = matrix->row_sizes[row];
200 8792 : const int col_size = matrix->col_sizes[col];
201 8792 : const int block_size = row_size * col_size;
202 8792 : recv_data_pos += 2 + block_size;
203 : }
204 144 : assert(recv_data_pos == total_recv_count);
205 144 : cp_mpi_free_mem(data_recv);
206 144 : }
207 :
208 : /*******************************************************************************
209 : * \brief Looks up a block from given matrics. This routine is thread-safe.
210 : * If the block is not found then a null pointer is returned.
211 : * \author Ole Schuett
212 : ******************************************************************************/
213 23120926 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
214 : double **block, int *row_size, int *col_size) {
215 23120926 : assert(0 <= row && row < matrix->nrows);
216 23120926 : assert(0 <= col && col < matrix->ncols);
217 23120926 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
218 23120926 : *row_size = matrix->row_sizes[row];
219 23120926 : *col_size = matrix->col_sizes[col];
220 23120926 : *block = NULL;
221 :
222 23120926 : const int ishard = dbm_get_shard_index(matrix, row, col);
223 23120926 : const dbm_shard_t *const shard = &matrix->shards[ishard];
224 23120926 : const dbm_block_t *const blk = dbm_shard_lookup(shard, row, col);
225 23120926 : if (blk != NULL) {
226 21486987 : *block = &shard->data[blk->offset];
227 : }
228 23120926 : }
229 :
230 : /*******************************************************************************
231 : * \brief Adds a block to given matrix. This routine is thread-safe.
232 : * If block already exist then it gets overwritten (or summed).
233 : * \author Ole Schuett
234 : ******************************************************************************/
235 36859635 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
236 : const bool summation, const double *block) {
237 36859635 : assert(0 <= row && row < matrix->nrows);
238 36859635 : assert(0 <= col && col < matrix->ncols);
239 36859635 : assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
240 36859635 : const int row_size = matrix->row_sizes[row];
241 36859635 : const int col_size = matrix->col_sizes[col];
242 36859635 : const int block_size = row_size * col_size;
243 :
244 36859635 : const int ishard = dbm_get_shard_index(matrix, row, col);
245 36859635 : dbm_shard_t *const shard = &matrix->shards[ishard];
246 36859635 : omp_set_lock(&shard->lock);
247 36859635 : const dbm_block_t *const blk =
248 36859635 : dbm_shard_get_or_allocate_block(shard, row, col, block_size);
249 36859635 : double *const blk_data = &shard->data[blk->offset];
250 36859635 : if (summation) {
251 3803590 : assert(blk_data != NULL || 0 == block_size);
252 1307440814 : for (int i = 0; i < block_size; i++) {
253 1303637224 : blk_data[i] += block[i];
254 : }
255 33056045 : } else if (block_size != 0) {
256 33050946 : memcpy(blk_data, block, block_size * sizeof(double));
257 : }
258 36859635 : omp_unset_lock(&shard->lock);
259 36859635 : }
260 :
261 : /*******************************************************************************
262 : * \brief Remove all blocks from matrix, but does not release underlying memory.
263 : * \author Ole Schuett
264 : ******************************************************************************/
265 2327334 : void dbm_clear(dbm_matrix_t *matrix) {
266 2327334 : assert(omp_get_num_threads() == 1);
267 :
268 2327334 : #pragma omp parallel for DBM_OMP_SCHEDULE
269 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
270 : dbm_shard_t *const shard = &matrix->shards[ishard];
271 : shard->data_promised = shard->data_size = shard->nblocks = 0;
272 : // Does not deallocate memory, hence data_allocated remains unchanged.
273 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
274 : }
275 2327334 : }
276 :
277 : /*******************************************************************************
278 : * \brief Removes all blocks from the matrix whose norm is below the threshold.
279 : * Blocks of size zero are always kept.
280 : * \author Ole Schuett
281 : ******************************************************************************/
282 691376 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
283 691376 : assert(omp_get_num_threads() == 1);
284 :
285 691376 : if (eps == 0.0) {
286 : return;
287 : }
288 639622 : const double eps2 = eps * eps;
289 :
290 639622 : #pragma omp parallel for DBM_OMP_SCHEDULE
291 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
292 : dbm_shard_t *const shard = &matrix->shards[ishard];
293 : const int old_nblocks = shard->nblocks;
294 : shard->nblocks = shard->data_promised = 0;
295 : memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
296 :
297 : for (int iblock = 0; iblock < old_nblocks; iblock++) {
298 : const dbm_block_t old_blk = shard->blocks[iblock];
299 : const double *old_blk_data = &shard->data[old_blk.offset];
300 : const int row_size = matrix->row_sizes[old_blk.row];
301 : const int col_size = matrix->col_sizes[old_blk.col];
302 : const int block_size = row_size * col_size;
303 : double norm = 0.0;
304 : for (int i = 0; i < block_size; i++) {
305 : const double value = old_blk_data[i];
306 : norm += value * value;
307 : if (eps2 <= norm) {
308 : break;
309 : }
310 : }
311 : // For historic reasons zero-sized blocks are never filtered.
312 : if (block_size > 0 && norm < eps2) {
313 : continue; // filter the block
314 : }
315 : // Re-create block.
316 : dbm_block_t *const new_blk = dbm_shard_promise_new_block(
317 : shard, old_blk.row, old_blk.col, block_size);
318 : assert(new_blk->offset <= old_blk.offset);
319 : if (new_blk->offset != old_blk.offset) {
320 : // Using memmove because it can handle overlapping buffers.
321 : double *new_blk_data = &shard->data[new_blk->offset];
322 : memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
323 : }
324 : }
325 : shard->data_size = shard->data_promised;
326 : // TODO: Could call realloc to release excess memory.
327 : }
328 : }
329 :
330 : /*******************************************************************************
331 : * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
332 : * This routine must always be called within an OpenMP parallel region.
333 : * \author Ole Schuett
334 : ******************************************************************************/
335 1627955 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
336 : const int rows[], const int cols[]) {
337 1627955 : assert(omp_get_num_threads() == omp_get_max_threads() &&
338 : "Please call dbm_reserve_blocks within an OpenMP parallel region.");
339 1627955 : const int my_rank = matrix->dist->my_rank;
340 :
341 32722937 : for (int i = 0; i < nblocks; i++) {
342 31094982 : const int row = rows[i], col = cols[i];
343 31094982 : assert(0 <= row && row < matrix->nrows);
344 31094982 : assert(0 <= col && col < matrix->ncols);
345 31094982 : assert(dbm_get_stored_coordinates(matrix, row, col) == my_rank);
346 31094982 : const int ishard = dbm_get_shard_index(matrix, row, col);
347 31094982 : dbm_shard_t *const shard = &matrix->shards[ishard];
348 31094982 : const int row_size = matrix->row_sizes[row];
349 31094982 : const int col_size = matrix->col_sizes[col];
350 31094982 : const int block_size = row_size * col_size;
351 31094982 : omp_set_lock(&shard->lock);
352 31094982 : dbm_shard_get_or_promise_block(shard, row, col, block_size);
353 31094982 : omp_unset_lock(&shard->lock);
354 : }
355 1627955 : #pragma omp barrier
356 :
357 : #pragma omp for DBM_OMP_SCHEDULE
358 1627955 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
359 1627955 : dbm_shard_t *const shard = &matrix->shards[ishard];
360 1627955 : dbm_shard_allocate_promised_blocks(shard);
361 : }
362 1627955 : }
363 :
364 : /*******************************************************************************
365 : * \brief Multiplies all entries in the given matrix by the given factor alpha.
366 : * \author Ole Schuett
367 : ******************************************************************************/
368 531705 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
369 531705 : assert(omp_get_num_threads() == 1);
370 531705 : if (alpha == 1.0) {
371 : return;
372 : }
373 406933 : if (alpha == 0.0) {
374 395085 : dbm_zero(matrix);
375 395085 : return;
376 : }
377 :
378 11848 : #pragma omp parallel for DBM_OMP_SCHEDULE
379 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
380 : dbm_shard_t *const shard = &matrix->shards[ishard];
381 : for (int i = 0; i < shard->data_size; i++) {
382 : shard->data[i] *= alpha;
383 : }
384 : }
385 : }
386 :
387 : /*******************************************************************************
388 : * \brief Sets all blocks in the given matrix to zero.
389 : * \author Ole Schuett
390 : ******************************************************************************/
391 395085 : void dbm_zero(dbm_matrix_t *matrix) {
392 395085 : assert(omp_get_num_threads() == 1);
393 :
394 395085 : #pragma omp parallel for DBM_OMP_SCHEDULE
395 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
396 : dbm_shard_t *const shard = &matrix->shards[ishard];
397 : if (shard->data != NULL) {
398 : memset(shard->data, 0, shard->data_size * sizeof(double));
399 : }
400 : }
401 395085 : }
402 :
403 : /*******************************************************************************
404 : * \brief Adds matrix_b to matrix_a.
405 : * \author Ole Schuett
406 : ******************************************************************************/
407 242028 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
408 242028 : assert(omp_get_num_threads() == 1);
409 242028 : assert(matrix_a->dist == matrix_b->dist);
410 :
411 242028 : #pragma omp parallel for DBM_OMP_SCHEDULE
412 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
413 : dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
414 : const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
415 : for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
416 : const dbm_block_t blk_b = shard_b->blocks[iblock];
417 : const int row_size = matrix_b->row_sizes[blk_b.row];
418 : const int col_size = matrix_b->col_sizes[blk_b.col];
419 : assert(row_size == matrix_a->row_sizes[blk_b.row]);
420 : assert(col_size == matrix_a->col_sizes[blk_b.col]);
421 : const int block_size = row_size * col_size;
422 : const dbm_block_t *const blk_a = dbm_shard_get_or_allocate_block(
423 : shard_a, blk_b.row, blk_b.col, block_size);
424 : double *const data_a = &shard_a->data[blk_a->offset];
425 : const double *const data_b = &shard_b->data[blk_b.offset];
426 : for (int i = 0; i < block_size; i++) {
427 : data_a[i] += data_b[i];
428 : }
429 : }
430 : }
431 242028 : }
432 :
433 : /*******************************************************************************
434 : * \brief Creates an iterator for the blocks of the given matrix.
435 : * The iteration order is not stable.
436 : * This routine must always be called within an OpenMP parallel region.
437 : * \author Ole Schuett
438 : ******************************************************************************/
439 3863791 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
440 3863791 : assert(omp_get_num_threads() == omp_get_max_threads() &&
441 : "Please call dbm_iterator_start within an OpenMP parallel region.");
442 3863791 : dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
443 3863791 : assert(iter != NULL);
444 3863791 : iter->matrix = matrix;
445 3863791 : iter->next_block = 0;
446 3863791 : iter->next_shard = omp_get_thread_num();
447 4494322 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
448 3863791 : matrix->shards[iter->next_shard].nblocks == 0) {
449 630531 : iter->next_shard += omp_get_num_threads();
450 : }
451 3863791 : assert(*iter_out == NULL);
452 3863791 : *iter_out = iter;
453 3863791 : }
454 :
455 : /*******************************************************************************
456 : * \brief Returns number of blocks the iterator will provide to calling thread.
457 : * \author Ole Schuett
458 : ******************************************************************************/
459 715488 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
460 715488 : int num_blocks = 0;
461 715488 : for (int ishard = omp_get_thread_num();
462 1430976 : ishard < dbm_get_num_shards(iter->matrix);
463 715488 : ishard += omp_get_num_threads()) {
464 715488 : num_blocks += iter->matrix->shards[ishard].nblocks;
465 : }
466 715488 : return num_blocks;
467 : }
468 :
469 : /*******************************************************************************
470 : * \brief Tests whether the given iterator has any block left.
471 : * \author Ole Schuett
472 : ******************************************************************************/
473 64916758 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
474 64916758 : return iter->next_shard < dbm_get_num_shards(iter->matrix);
475 : }
476 :
477 : /*******************************************************************************
478 : * \brief Returns the next block from the given iterator.
479 : * \author Ole Schuett
480 : ******************************************************************************/
481 72053705 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
482 : double **block, int *row_size, int *col_size) {
483 72053705 : const dbm_matrix_t *matrix = iter->matrix;
484 72053705 : assert(iter->next_shard < dbm_get_num_shards(matrix));
485 72053705 : const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
486 72053705 : assert(iter->next_block < shard->nblocks);
487 72053705 : dbm_block_t *blk = &shard->blocks[iter->next_block];
488 :
489 72053705 : *row = blk->row;
490 72053705 : *col = blk->col;
491 72053705 : *row_size = matrix->row_sizes[blk->row];
492 72053705 : *col_size = matrix->col_sizes[blk->col];
493 72053705 : *block = &shard->data[blk->offset];
494 :
495 72053705 : iter->next_block++;
496 72053705 : if (iter->next_block >= shard->nblocks) {
497 : // Advance to the next non-empty shard...
498 3233260 : iter->next_shard += omp_get_num_threads();
499 3233260 : while (iter->next_shard < dbm_get_num_shards(matrix) &&
500 0 : matrix->shards[iter->next_shard].nblocks == 0) {
501 0 : iter->next_shard += omp_get_num_threads();
502 : }
503 3233260 : iter->next_block = 0; // ...and continue with its first block.
504 : }
505 72053705 : }
506 :
507 : /*******************************************************************************
508 : * \brief Releases the given iterator.
509 : * \author Ole Schuett
510 : ******************************************************************************/
511 3863791 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
512 :
513 : /*******************************************************************************
514 : * \brief Private routine to accumulate using Kahan's summation.
515 : * \author Hans Pabst
516 : ******************************************************************************/
517 14383468 : static double kahan_sum(double value, double *accumulator,
518 : double *compensation) {
519 14383468 : double r, c;
520 14383468 : assert(NULL != accumulator && NULL != compensation);
521 14383468 : c = value - *compensation;
522 14383468 : r = *accumulator + c;
523 14383468 : *compensation = (r - *accumulator) - c;
524 14383468 : *accumulator = r;
525 14383468 : return r;
526 : }
527 :
528 : /*******************************************************************************
529 : * \brief Computes a checksum of the given matrix.
530 : * \author Ole Schuett
531 : ******************************************************************************/
532 190 : double dbm_checksum(const dbm_matrix_t *matrix) {
533 190 : double checksum = 0.0, compensation = 0.0;
534 380 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
535 190 : const dbm_shard_t *shard = &matrix->shards[ishard];
536 14383658 : for (int i = 0; i < shard->data_size; i++) {
537 14383468 : const double value = shard->data[i];
538 14383468 : kahan_sum(value * value, &checksum, &compensation);
539 : }
540 : }
541 190 : cp_mpi_sum_double(&checksum, 1, matrix->dist->comm);
542 190 : return checksum;
543 : }
544 :
545 : /*******************************************************************************
546 : * \brief Returns the largest absolute value of all matrix elements.
547 : * \author Ole Schuett
548 : ******************************************************************************/
549 48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
550 48 : double maxabs = 0.0;
551 96 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
552 48 : const dbm_shard_t *shard = &matrix->shards[ishard];
553 1599050 : for (int i = 0; i < shard->data_size; i++) {
554 1599002 : maxabs = fmax(maxabs, fabs(shard->data[i]));
555 : }
556 : }
557 48 : cp_mpi_max_double(&maxabs, 1, matrix->dist->comm);
558 48 : return maxabs;
559 : }
560 :
561 : /*******************************************************************************
562 : * \brief Calculates maximum relative difference between matrix_a and matrix_b.
563 : * \author Hans Pabst
564 : ******************************************************************************/
565 0 : double dbm_maxeps(const dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
566 0 : const int num_shards = dbm_get_num_shards(matrix_a);
567 0 : double epsilon = 0;
568 :
569 0 : assert(omp_get_num_threads() == 1);
570 0 : assert(matrix_a->dist == matrix_b->dist);
571 0 : assert(num_shards == dbm_get_num_shards(matrix_b));
572 :
573 0 : #pragma omp parallel for reduction(max : epsilon) DBM_OMP_SCHEDULE
574 : for (int ishard = 0; ishard < num_shards; ++ishard) {
575 : const dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
576 : const dbm_shard_t *const shard_b = &matrix_b->shards[ishard];
577 : for (int iblock = 0; iblock < shard_a->nblocks; ++iblock) {
578 : const dbm_block_t *const blk_a = &shard_a->blocks[iblock];
579 : const dbm_block_t *const blk_b =
580 : dbm_shard_lookup(shard_b, blk_a->row, blk_a->col);
581 : const int row_size = matrix_a->row_sizes[blk_a->row];
582 : const int col_size = matrix_a->col_sizes[blk_a->col];
583 : assert(NULL == blk_b || row_size == matrix_b->row_sizes[blk_b->row]);
584 : assert(NULL == blk_b || col_size == matrix_b->col_sizes[blk_b->col]);
585 : const double *const data_a = &shard_a->data[blk_a->offset];
586 : const double *const data_b =
587 : (NULL == blk_b) ? NULL : &shard_b->data[blk_b->offset];
588 : const int block_size = row_size * col_size;
589 : for (int i = 0; i < block_size; ++i) {
590 : const double d = data_a[i] - (NULL == data_b ? 0.0 : data_b[i]);
591 : const double e = fabs(0 != data_a[i] ? (d / data_a[i]) : d);
592 : if (epsilon < e) {
593 : epsilon = e;
594 : }
595 : }
596 : }
597 : for (int iblock = 0; iblock < shard_b->nblocks; ++iblock) {
598 : const dbm_block_t *const blk_b = &shard_b->blocks[iblock];
599 : if (NULL != dbm_shard_lookup(shard_a, blk_b->row, blk_b->col)) {
600 : continue;
601 : }
602 : const int row_size = matrix_b->row_sizes[blk_b->row];
603 : const int col_size = matrix_b->col_sizes[blk_b->col];
604 : const double *const data_b = &shard_b->data[blk_b->offset];
605 : const int block_size = row_size * col_size;
606 : for (int i = 0; i < block_size; ++i) {
607 : const double e = fabs(data_b[i]);
608 : if (epsilon < e) {
609 : epsilon = e;
610 : }
611 : }
612 : }
613 : }
614 :
615 0 : cp_mpi_max_double(&epsilon, 1, matrix_a->dist->comm);
616 0 : return epsilon;
617 : }
618 :
619 : /*******************************************************************************
620 : * \brief Returns the name of the matrix of the given matrix.
621 : * \author Ole Schuett
622 : ******************************************************************************/
623 2011693 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
624 :
625 : /*******************************************************************************
626 : * \brief Returns the number of local Non-Zero Elements of the given matrix.
627 : * \author Ole Schuett
628 : ******************************************************************************/
629 2051191 : int dbm_get_nze(const dbm_matrix_t *matrix) {
630 2051191 : int nze = 0;
631 4102382 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
632 2051191 : nze += matrix->shards[ishard].data_size;
633 : }
634 2051191 : return nze;
635 : }
636 :
637 : /*******************************************************************************
638 : * \brief Returns the number of local blocks of the given matrix.
639 : * \author Ole Schuett
640 : ******************************************************************************/
641 1074092 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
642 1074092 : int nblocks = 0;
643 2148184 : for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
644 1074092 : nblocks += matrix->shards[ishard].nblocks;
645 : }
646 1074092 : return nblocks;
647 : }
648 :
649 : /*******************************************************************************
650 : * \brief Returns the row block sizes of the given matrix.
651 : * \author Ole Schuett
652 : ******************************************************************************/
653 4381334 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
654 : const int **row_sizes) {
655 4381334 : *nrows = matrix->nrows;
656 4381334 : *row_sizes = matrix->row_sizes;
657 4381334 : }
658 :
659 : /*******************************************************************************
660 : * \brief Returns the column block sizes of the given matrix.
661 : * \author Ole Schuett
662 : ******************************************************************************/
663 2914424 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
664 : const int **col_sizes) {
665 2914424 : *ncols = matrix->ncols;
666 2914424 : *col_sizes = matrix->col_sizes;
667 2914424 : }
668 :
669 : /*******************************************************************************
670 : * \brief Returns the local row block sizes of the given matrix.
671 : * \author Ole Schuett
672 : ******************************************************************************/
673 308628 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
674 : const int **local_rows) {
675 308628 : *nlocal_rows = matrix->dist->rows.nlocals;
676 308628 : *local_rows = matrix->dist->rows.local_indicies;
677 308628 : }
678 :
679 : /*******************************************************************************
680 : * \brief Returns the local column block sizes of the given matrix.
681 : * \author Ole Schuett
682 : ******************************************************************************/
683 138352 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
684 : const int **local_cols) {
685 138352 : *nlocal_cols = matrix->dist->cols.nlocals;
686 138352 : *local_cols = matrix->dist->cols.local_indicies;
687 138352 : }
688 :
689 : /*******************************************************************************
690 : * \brief Returns the MPI rank on which the given block should be stored.
691 : * \author Ole Schuett
692 : ******************************************************************************/
693 99246582 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
694 : const int col) {
695 99246582 : return dbm_distribution_stored_coords(matrix->dist, row, col);
696 : }
697 :
698 : /*******************************************************************************
699 : * \brief Returns the distribution of the given matrix.
700 : * \author Ole Schuett
701 : ******************************************************************************/
702 1415272 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
703 1415272 : return matrix->dist;
704 : }
705 :
706 : // EOF
|