LCOV - CP2K Regtests (git:3158929) - src/dbm/dbm

LCOV - code coverage report

Current view:	top level - src/dbm - dbm_matrix.c (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:3158929)	Lines:	296	298	99.3 %
Date:	2025-07-22 22:41:15	Functions:	29	29	100.0 %

          Line data    Source code

       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <math.h>
      10             : #include <omp.h>
      11             : #include <stdbool.h>
      12             : #include <stddef.h>
      13             : #include <stdlib.h>
      14             : #include <string.h>
      15             : 
      16             : #include "dbm_hyperparams.h"
      17             : #include "dbm_matrix.h"
      18             : 
      19             : /*******************************************************************************
      20             :  * \brief Creates a new matrix.
      21             :  * \author Ole Schuett
      22             :  ******************************************************************************/
      23     1684494 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
      24             :                 const char name[], const int nrows, const int ncols,
      25             :                 const int row_sizes[nrows], const int col_sizes[ncols]) {
      26     1684494 :   assert(omp_get_num_threads() == 1);
      27             : 
      28     1684494 :   dbm_matrix_t *matrix = calloc(1, sizeof(dbm_matrix_t));
      29             : 
      30     1684494 :   assert(dist->rows.length == nrows);
      31     1684494 :   assert(dist->cols.length == ncols);
      32     1684494 :   dbm_distribution_hold(dist);
      33     1684494 :   matrix->dist = dist;
      34             : 
      35     1684494 :   size_t size = (strlen(name) + 1) * sizeof(char);
      36     1684494 :   matrix->name = malloc(size);
      37     1684494 :   assert(matrix->name != NULL && name != NULL);
      38     1684494 :   memcpy(matrix->name, name, size);
      39             : 
      40     1684494 :   matrix->nrows = nrows;
      41     1684494 :   matrix->ncols = ncols;
      42             : 
      43     1684494 :   size = nrows * sizeof(int);
      44     1684494 :   matrix->row_sizes = malloc(size);
      45     1684494 :   assert(matrix->row_sizes != NULL || size == 0);
      46     1684494 :   if (size != 0) {
      47     1683934 :     memcpy(matrix->row_sizes, row_sizes, size);
      48             :   }
      49             : 
      50     1684494 :   size = ncols * sizeof(int);
      51     1684494 :   matrix->col_sizes = malloc(size);
      52     1684494 :   assert(matrix->col_sizes != NULL || size == 0);
      53     1684494 :   if (size != 0) {
      54     1680330 :     memcpy(matrix->col_sizes, col_sizes, size);
      55             :   }
      56             : 
      57     1684494 :   const int num_shards = dbm_get_num_shards(matrix);
      58     1684494 :   matrix->shards = malloc(num_shards * sizeof(dbm_shard_t));
      59     1684494 :   assert(matrix->shards != NULL || num_shards == 0);
      60     1684494 : #pragma omp parallel for
      61             :   for (int ishard = 0; ishard < num_shards; ishard++) {
      62             :     dbm_shard_init(&matrix->shards[ishard]);
      63             :   }
      64             : 
      65     1684494 :   assert(*matrix_out == NULL);
      66     1684494 :   *matrix_out = matrix;
      67     1684494 : }
      68             : 
      69             : /*******************************************************************************
      70             :  * \brief Releases a matrix and all its ressources.
      71             :  * \author Ole Schuett
      72             :  ******************************************************************************/
      73     1684494 : void dbm_release(dbm_matrix_t *matrix) {
      74     1684494 :   assert(omp_get_num_threads() == 1);
      75     3368988 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
      76     1684494 :     dbm_shard_release(&matrix->shards[ishard]);
      77             :   }
      78     1684494 :   dbm_distribution_release(matrix->dist);
      79     1684494 :   free(matrix->row_sizes);
      80     1684494 :   free(matrix->col_sizes);
      81     1684494 :   free(matrix->shards);
      82     1684494 :   free(matrix->name);
      83     1684494 :   free(matrix);
      84     1684494 : }
      85             : 
      86             : /*******************************************************************************
      87             :  * \brief Copies content of matrix_b into matrix_a.
      88             :  *        Matrices must have the same row/col block sizes and distribution.
      89             :  * \author Ole Schuett
      90             :  ******************************************************************************/
      91      421322 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
      92      421322 :   assert(omp_get_num_threads() == 1);
      93             : 
      94      421322 :   assert(matrix_b->nrows == matrix_a->nrows);
      95     5275008 :   for (int i = 0; i < matrix_b->nrows; i++) {
      96     4853686 :     assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
      97             :   }
      98      421322 :   assert(matrix_b->ncols == matrix_a->ncols);
      99    27217767 :   for (int i = 0; i < matrix_b->ncols; i++) {
     100    26796445 :     assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
     101             :   }
     102             : 
     103      421322 :   assert(matrix_a->dist == matrix_b->dist);
     104             : 
     105      421322 : #pragma omp parallel for DBM_OMP_SCHEDULE
     106             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
     107             :     dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
     108             :   }
     109      421322 : }
     110             : 
     111             : /*******************************************************************************
     112             :  * \brief Copies content of matrix_b into matrix_a.
     113             :  *        Matrices may have different distributions.
     114             :  * \author Ole Schuett
     115             :  ******************************************************************************/
     116         144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
     117         144 :   assert(omp_get_num_threads() == 1);
     118         144 :   assert(matrix->nrows == redist->nrows);
     119        6384 :   for (int i = 0; i < matrix->nrows; i++) {
     120        6240 :     assert(matrix->row_sizes[i] == redist->row_sizes[i]);
     121             :   }
     122         144 :   assert(matrix->ncols == redist->ncols);
     123        6384 :   for (int i = 0; i < matrix->ncols; i++) {
     124        6240 :     assert(matrix->col_sizes[i] == redist->col_sizes[i]);
     125             :   }
     126             : 
     127         144 :   assert(dbm_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
     128         144 :   const dbm_mpi_comm_t comm = redist->dist->comm;
     129         144 :   const int nranks = dbm_mpi_comm_size(comm);
     130             : 
     131             :   // 1st pass: Compute send_count.
     132         144 :   int send_count[nranks];
     133         144 :   memset(send_count, 0, nranks * sizeof(int));
     134         288 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     135         144 :     dbm_shard_t *shard = &matrix->shards[ishard];
     136        8936 :     for (int iblock = 0; iblock < shard->nblocks; iblock++) {
     137        8792 :       const dbm_block_t *blk = &shard->blocks[iblock];
     138        8792 :       const int row_size = matrix->row_sizes[blk->row];
     139        8792 :       const int col_size = matrix->col_sizes[blk->col];
     140        8792 :       const int block_size = row_size * col_size;
     141        8792 :       const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
     142        8792 :       assert(0 <= rank && rank < nranks);
     143        8792 :       send_count[rank] += 2 + block_size;
     144             :     }
     145             :   }
     146             : 
     147             :   // 1st communication: Exchange counts.
     148         144 :   int recv_count[nranks];
     149         144 :   dbm_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
     150             : 
     151             :   // Compute displacements and allocate data buffers.
     152         144 :   int send_displ[nranks + 1], recv_displ[nranks + 1];
     153         144 :   send_displ[0] = recv_displ[0] = 0;
     154         432 :   for (int irank = 1; irank <= nranks; irank++) {
     155         288 :     send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
     156         288 :     recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
     157             :   }
     158         144 :   const int total_send_count = send_displ[nranks];
     159         144 :   const int total_recv_count = recv_displ[nranks];
     160         144 :   double *data_send = dbm_mpi_alloc_mem(total_send_count * sizeof(double));
     161         144 :   double *data_recv = dbm_mpi_alloc_mem(total_recv_count * sizeof(double));
     162             : 
     163             :   // 2nd pass: Fill send_data.
     164         144 :   int send_data_positions[nranks];
     165         144 :   memcpy(send_data_positions, send_displ, nranks * sizeof(int));
     166         288 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     167         144 :     dbm_shard_t *shard = &matrix->shards[ishard];
     168        8936 :     for (int iblock = 0; iblock < shard->nblocks; iblock++) {
     169        8792 :       const dbm_block_t *blk = &shard->blocks[iblock];
     170        8792 :       const double *blk_data = &shard->data[blk->offset];
     171        8792 :       const int row_size = matrix->row_sizes[blk->row];
     172        8792 :       const int col_size = matrix->col_sizes[blk->col];
     173        8792 :       const int block_size = row_size * col_size;
     174        8792 :       const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
     175        8792 :       const int pos = send_data_positions[rank];
     176        8792 :       data_send[pos + 0] = blk->row; // send integers as doubles
     177        8792 :       data_send[pos + 1] = blk->col;
     178        8792 :       memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
     179        8792 :       send_data_positions[rank] += 2 + block_size;
     180             :     }
     181             :   }
     182         432 :   for (int irank = 0; irank < nranks; irank++) {
     183         288 :     assert(send_data_positions[irank] == send_displ[irank + 1]);
     184             :   }
     185             : 
     186             :   // 2nd communication: Exchange data.
     187         144 :   dbm_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
     188             :                            recv_count, recv_displ, comm);
     189         144 :   dbm_mpi_free_mem(data_send);
     190             : 
     191             :   // 3rd pass: Unpack data.
     192         144 :   dbm_clear(redist);
     193         144 :   int recv_data_pos = 0;
     194        8936 :   while (recv_data_pos < total_recv_count) {
     195        8792 :     const int row = (int)data_recv[recv_data_pos + 0];
     196        8792 :     const int col = (int)data_recv[recv_data_pos + 1];
     197        8792 :     assert(data_recv[recv_data_pos + 0] - (double)row == 0.0);
     198        8792 :     assert(data_recv[recv_data_pos + 1] - (double)col == 0.0);
     199        8792 :     dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
     200        8792 :     const int row_size = matrix->row_sizes[row];
     201        8792 :     const int col_size = matrix->col_sizes[col];
     202        8792 :     const int block_size = row_size * col_size;
     203        8792 :     recv_data_pos += 2 + block_size;
     204             :   }
     205         144 :   assert(recv_data_pos == total_recv_count);
     206         144 :   dbm_mpi_free_mem(data_recv);
     207         144 : }
     208             : 
     209             : /*******************************************************************************
     210             :  * \brief Looks up a block from given matrics. This routine is thread-safe.
     211             :  *        If the block is not found then a null pointer is returned.
     212             :  * \author Ole Schuett
     213             :  ******************************************************************************/
     214    21370209 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
     215             :                      double **block, int *row_size, int *col_size) {
     216    21370209 :   assert(0 <= row && row < matrix->nrows);
     217    21370209 :   assert(0 <= col && col < matrix->ncols);
     218    21370209 :   assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
     219    21370209 :   *row_size = matrix->row_sizes[row];
     220    21370209 :   *col_size = matrix->col_sizes[col];
     221    21370209 :   *block = NULL;
     222             : 
     223    21370209 :   const int ishard = dbm_get_shard_index(matrix, row, col);
     224    21370209 :   const dbm_shard_t *shard = &matrix->shards[ishard];
     225    21370209 :   dbm_block_t *blk = dbm_shard_lookup(shard, row, col);
     226    21370209 :   if (blk != NULL) {
     227    19803145 :     *block = &shard->data[blk->offset];
     228             :   }
     229    21370209 : }
     230             : 
     231             : /*******************************************************************************
     232             :  * \brief Adds a block to given matrix. This routine is thread-safe.
     233             :  *        If block already exist then it gets overwritten (or summed).
     234             :  * \author Ole Schuett
     235             :  ******************************************************************************/
     236    34182882 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
     237             :                    const bool summation, const double *block) {
     238    34182882 :   assert(0 <= row && row < matrix->nrows);
     239    34182882 :   assert(0 <= col && col < matrix->ncols);
     240    34182882 :   assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
     241    34182882 :   const int row_size = matrix->row_sizes[row];
     242    34182882 :   const int col_size = matrix->col_sizes[col];
     243    34182882 :   const int block_size = row_size * col_size;
     244             : 
     245    34182882 :   const int ishard = dbm_get_shard_index(matrix, row, col);
     246    34182882 :   dbm_shard_t *shard = &matrix->shards[ishard];
     247    34182882 :   omp_set_lock(&shard->lock);
     248    34182882 :   dbm_block_t *blk =
     249    34182882 :       dbm_shard_get_or_allocate_block(shard, row, col, block_size);
     250    34182882 :   double *blk_data = &shard->data[blk->offset];
     251    34182882 :   if (summation) {
     252     3657777 :     assert(blk_data != NULL || 0 == block_size);
     253  1292510007 :     for (int i = 0; i < block_size; i++) {
     254  1288852230 :       blk_data[i] += block[i];
     255             :     }
     256    30525105 :   } else if (block_size != 0) {
     257    30520006 :     memcpy(blk_data, block, block_size * sizeof(double));
     258             :   }
     259    34182882 :   omp_unset_lock(&shard->lock);
     260    34182882 : }
     261             : 
     262             : /*******************************************************************************
     263             :  * \brief Remove all blocks from matrix, but does not release underlying memory.
     264             :  * \author Ole Schuett
     265             :  ******************************************************************************/
     266     2022940 : void dbm_clear(dbm_matrix_t *matrix) {
     267     2022940 :   assert(omp_get_num_threads() == 1);
     268             : 
     269     2022940 : #pragma omp parallel for DBM_OMP_SCHEDULE
     270             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     271             :     dbm_shard_t *shard = &matrix->shards[ishard];
     272             :     shard->nblocks = 0;
     273             :     shard->data_size = 0;
     274             :     shard->data_promised = 0;
     275             :     // Does not deallocate memory, hence data_allocated remains unchanged.
     276             :     memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
     277             :   }
     278     2022940 : }
     279             : 
     280             : /*******************************************************************************
     281             :  * \brief Removes all blocks from the matrix whose norm is below the threshold.
     282             :  *        Blocks of size zero are always kept.
     283             :  * \author Ole Schuett
     284             :  ******************************************************************************/
     285      602729 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
     286      602729 :   assert(omp_get_num_threads() == 1);
     287             : 
     288      602729 :   if (eps == 0.0) {
     289             :     return;
     290             :   }
     291      561951 :   const double eps2 = eps * eps;
     292             : 
     293      561951 : #pragma omp parallel for DBM_OMP_SCHEDULE
     294             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     295             :     dbm_shard_t *shard = &matrix->shards[ishard];
     296             :     const int old_nblocks = shard->nblocks;
     297             :     shard->nblocks = 0;
     298             :     shard->data_promised = 0;
     299             :     memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
     300             : 
     301             :     for (int iblock = 0; iblock < old_nblocks; iblock++) {
     302             :       const dbm_block_t old_blk = shard->blocks[iblock];
     303             :       const double *old_blk_data = &shard->data[old_blk.offset];
     304             :       const int row_size = matrix->row_sizes[old_blk.row];
     305             :       const int col_size = matrix->col_sizes[old_blk.col];
     306             :       const int block_size = row_size * col_size;
     307             :       double norm = 0.0;
     308             :       for (int i = 0; i < block_size; i++) {
     309             :         const double value = old_blk_data[i];
     310             :         norm += value * value;
     311             :         if (eps2 <= norm) {
     312             :           break;
     313             :         }
     314             :       }
     315             :       // For historic reasons zero-sized blocks are never filtered.
     316             :       if (block_size > 0 && norm < eps2) {
     317             :         continue; // filter the block
     318             :       }
     319             :       // Re-create block.
     320             :       dbm_block_t *new_blk = dbm_shard_promise_new_block(
     321             :           shard, old_blk.row, old_blk.col, block_size);
     322             :       assert(new_blk->offset <= old_blk.offset);
     323             :       if (new_blk->offset != old_blk.offset) {
     324             :         // Using memmove because it can handle overlapping buffers.
     325             :         double *new_blk_data = &shard->data[new_blk->offset];
     326             :         memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
     327             :       }
     328             :     }
     329             :     shard->data_size = shard->data_promised;
     330             :     // TODO: Could call realloc to release excess memory.
     331             :   }
     332             : }
     333             : 
     334             : /*******************************************************************************
     335             :  * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
     336             :  *        This routine must always be called within an OpenMP parallel region.
     337             :  * \author Ole Schuett
     338             :  ******************************************************************************/
     339     1428081 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
     340             :                         const int rows[], const int cols[]) {
     341     1428081 :   assert(omp_get_num_threads() == omp_get_max_threads() &&
     342             :          "Please call dbm_reserve_blocks within an OpenMP parallel region.");
     343     1428081 :   const int my_rank = matrix->dist->my_rank;
     344    30489283 :   for (int i = 0; i < nblocks; i++) {
     345    29061202 :     const int ishard = dbm_get_shard_index(matrix, rows[i], cols[i]);
     346    29061202 :     dbm_shard_t *shard = &matrix->shards[ishard];
     347    29061202 :     omp_set_lock(&shard->lock);
     348    29061202 :     assert(0 <= rows[i] && rows[i] < matrix->nrows);
     349    29061202 :     assert(0 <= cols[i] && cols[i] < matrix->ncols);
     350    29061202 :     assert(dbm_get_stored_coordinates(matrix, rows[i], cols[i]) == my_rank);
     351    29061202 :     const int row_size = matrix->row_sizes[rows[i]];
     352    29061202 :     const int col_size = matrix->col_sizes[cols[i]];
     353    29061202 :     const int block_size = row_size * col_size;
     354    29061202 :     dbm_shard_get_or_promise_block(shard, rows[i], cols[i], block_size);
     355    29061202 :     omp_unset_lock(&shard->lock);
     356             :   }
     357     1428081 : #pragma omp barrier
     358             : 
     359             : #pragma omp for DBM_OMP_SCHEDULE
     360     1428081 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     361     1428081 :     dbm_shard_t *shard = &matrix->shards[ishard];
     362     1428081 :     dbm_shard_allocate_promised_blocks(shard);
     363             :   }
     364     1428081 : }
     365             : 
     366             : /*******************************************************************************
     367             :  * \brief Multiplies all entries in the given matrix by the given factor alpha.
     368             :  * \author Ole Schuett
     369             :  ******************************************************************************/
     370      453308 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
     371      453308 :   assert(omp_get_num_threads() == 1);
     372      453308 :   if (alpha == 1.0) {
     373             :     return;
     374             :   }
     375      342903 :   if (alpha == 0.0) {
     376      332211 :     dbm_zero(matrix);
     377      332211 :     return;
     378             :   }
     379             : 
     380       10692 : #pragma omp parallel for DBM_OMP_SCHEDULE
     381             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     382             :     dbm_shard_t *shard = &matrix->shards[ishard];
     383             :     for (int i = 0; i < shard->data_size; i++) {
     384             :       shard->data[i] *= alpha;
     385             :     }
     386             :   }
     387             : }
     388             : 
     389             : /*******************************************************************************
     390             :  * \brief Sets all blocks in the given matrix to zero.
     391             :  * \author Ole Schuett
     392             :  ******************************************************************************/
     393      332211 : void dbm_zero(dbm_matrix_t *matrix) {
     394      332211 :   assert(omp_get_num_threads() == 1);
     395             : 
     396      332211 : #pragma omp parallel for DBM_OMP_SCHEDULE
     397             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     398             :     dbm_shard_t *shard = &matrix->shards[ishard];
     399             :     if (shard->data != NULL) {
     400             :       memset(shard->data, 0, shard->data_size * sizeof(double));
     401             :     }
     402             :   }
     403      332211 : }
     404             : 
     405             : /*******************************************************************************
     406             :  * \brief Adds matrix_b to matrix_a.
     407             :  * \author Ole Schuett
     408             :  ******************************************************************************/
     409      208290 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
     410      208290 :   assert(omp_get_num_threads() == 1);
     411      208290 :   assert(matrix_a->dist == matrix_b->dist);
     412             : 
     413      208290 : #pragma omp parallel for DBM_OMP_SCHEDULE
     414             :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
     415             :     dbm_shard_t *shard_a = &matrix_a->shards[ishard];
     416             :     const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
     417             :     for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
     418             :       const dbm_block_t blk_b = shard_b->blocks[iblock];
     419             : 
     420             :       const int row_size = matrix_b->row_sizes[blk_b.row];
     421             :       const int col_size = matrix_b->col_sizes[blk_b.col];
     422             :       assert(row_size == matrix_a->row_sizes[blk_b.row]);
     423             :       assert(col_size == matrix_a->col_sizes[blk_b.col]);
     424             :       const int block_size = row_size * col_size;
     425             :       dbm_block_t *blk_a = dbm_shard_get_or_allocate_block(
     426             :           shard_a, blk_b.row, blk_b.col, block_size);
     427             :       double *data_a = &shard_a->data[blk_a->offset];
     428             :       const double *data_b = &shard_b->data[blk_b.offset];
     429             :       for (int i = 0; i < block_size; i++) {
     430             :         data_a[i] += data_b[i];
     431             :       }
     432             :     }
     433             :   }
     434      208290 : }
     435             : 
     436             : /*******************************************************************************
     437             :  * \brief Creates an iterator for the blocks of the given matrix.
     438             :  *        The iteration order is not stable.
     439             :  *        This routine must always be called within an OpenMP parallel region.
     440             :  * \author Ole Schuett
     441             :  ******************************************************************************/
     442     3347846 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
     443     3347846 :   assert(omp_get_num_threads() == omp_get_max_threads() &&
     444             :          "Please call dbm_iterator_start within an OpenMP parallel region.");
     445     3347846 :   dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
     446     3347846 :   assert(iter != NULL);
     447     3347846 :   iter->matrix = matrix;
     448     3347846 :   iter->next_block = 0;
     449     3347846 :   iter->next_shard = omp_get_thread_num();
     450     3885561 :   while (iter->next_shard < dbm_get_num_shards(matrix) &&
     451     3347846 :          matrix->shards[iter->next_shard].nblocks == 0) {
     452      537715 :     iter->next_shard += omp_get_num_threads();
     453             :   }
     454     3347846 :   assert(*iter_out == NULL);
     455     3347846 :   *iter_out = iter;
     456     3347846 : }
     457             : 
     458             : /*******************************************************************************
     459             :  * \brief Returns number of blocks the iterator will provide to calling thread.
     460             :  * \author Ole Schuett
     461             :  ******************************************************************************/
     462      632927 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
     463      632927 :   int num_blocks = 0;
     464      632927 :   for (int ishard = omp_get_thread_num();
     465     1265854 :        ishard < dbm_get_num_shards(iter->matrix);
     466      632927 :        ishard += omp_get_num_threads()) {
     467      632927 :     num_blocks += iter->matrix->shards[ishard].nblocks;
     468             :   }
     469      632927 :   return num_blocks;
     470             : }
     471             : 
     472             : /*******************************************************************************
     473             :  * \brief Tests whether the given iterator has any block left.
     474             :  * \author Ole Schuett
     475             :  ******************************************************************************/
     476    59853509 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
     477    59853509 :   return iter->next_shard < dbm_get_num_shards(iter->matrix);
     478             : }
     479             : 
     480             : /*******************************************************************************
     481             :  * \brief Returns the next block from the given iterator.
     482             :  * \author Ole Schuett
     483             :  ******************************************************************************/
     484    66515068 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
     485             :                              double **block, int *row_size, int *col_size) {
     486    66515068 :   const dbm_matrix_t *matrix = iter->matrix;
     487    66515068 :   assert(iter->next_shard < dbm_get_num_shards(matrix));
     488    66515068 :   const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
     489    66515068 :   assert(iter->next_block < shard->nblocks);
     490    66515068 :   dbm_block_t *blk = &shard->blocks[iter->next_block];
     491             : 
     492    66515068 :   *row = blk->row;
     493    66515068 :   *col = blk->col;
     494    66515068 :   *row_size = matrix->row_sizes[blk->row];
     495    66515068 :   *col_size = matrix->col_sizes[blk->col];
     496    66515068 :   *block = &shard->data[blk->offset];
     497             : 
     498    66515068 :   iter->next_block++;
     499    66515068 :   if (iter->next_block >= shard->nblocks) {
     500             :     // Advance to the next non-empty shard...
     501     2810131 :     iter->next_shard += omp_get_num_threads();
     502     2810131 :     while (iter->next_shard < dbm_get_num_shards(matrix) &&
     503           0 :            matrix->shards[iter->next_shard].nblocks == 0) {
     504           0 :       iter->next_shard += omp_get_num_threads();
     505             :     }
     506     2810131 :     iter->next_block = 0; // ...and continue with its first block.
     507             :   }
     508    66515068 : }
     509             : 
     510             : /*******************************************************************************
     511             :  * \brief Releases the given iterator.
     512             :  * \author Ole Schuett
     513             :  ******************************************************************************/
     514     3347846 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
     515             : 
     516             : /*******************************************************************************
     517             :  * \brief Private routine to accumulate using Kahan's summation.
     518             :  * \author Hans Pabst
     519             :  ******************************************************************************/
     520    14383468 : static double kahan_sum(double value, double *accumulator,
     521             :                         double *compensation) {
     522    14383468 :   double r, c;
     523    14383468 :   assert(NULL != accumulator && NULL != compensation);
     524    14383468 :   c = value - *compensation;
     525    14383468 :   r = *accumulator + c;
     526    14383468 :   *compensation = (r - *accumulator) - c;
     527    14383468 :   *accumulator = r;
     528    14383468 :   return r;
     529             : }
     530             : 
     531             : /*******************************************************************************
     532             :  * \brief Computes a checksum of the given matrix.
     533             :  * \author Ole Schuett
     534             :  ******************************************************************************/
     535         190 : double dbm_checksum(const dbm_matrix_t *matrix) {
     536         190 :   double checksum = 0.0, compensation = 0.0;
     537         380 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     538         190 :     const dbm_shard_t *shard = &matrix->shards[ishard];
     539    14383658 :     for (int i = 0; i < shard->data_size; i++) {
     540    14383468 :       const double value = shard->data[i];
     541    14383468 :       kahan_sum(value * value, &checksum, &compensation);
     542             :     }
     543             :   }
     544         190 :   dbm_mpi_sum_double(&checksum, 1, matrix->dist->comm);
     545         190 :   return checksum;
     546             : }
     547             : 
     548             : /*******************************************************************************
     549             :  * \brief Returns the largest absolute value of all matrix elements.
     550             :  * \author Ole Schuett
     551             :  ******************************************************************************/
     552          48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
     553          48 :   double maxabs = 0.0;
     554          96 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     555          48 :     const dbm_shard_t *shard = &matrix->shards[ishard];
     556     1599050 :     for (int i = 0; i < shard->data_size; i++) {
     557     1599002 :       maxabs = fmax(maxabs, fabs(shard->data[i]));
     558             :     }
     559             :   }
     560          48 :   dbm_mpi_max_double(&maxabs, 1, matrix->dist->comm);
     561          48 :   return maxabs;
     562             : }
     563             : 
     564             : /*******************************************************************************
     565             :  * \brief Returns the name of the matrix of the given matrix.
     566             :  * \author Ole Schuett
     567             :  ******************************************************************************/
     568     1748211 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
     569             : 
     570             : /*******************************************************************************
     571             :  * \brief Returns the number of local Non-Zero Elements of the given matrix.
     572             :  * \author Ole Schuett
     573             :  ******************************************************************************/
     574     1856510 : int dbm_get_nze(const dbm_matrix_t *matrix) {
     575     1856510 :   int nze = 0;
     576     3713020 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     577     1856510 :     nze += matrix->shards[ishard].data_size;
     578             :   }
     579     1856510 :   return nze;
     580             : }
     581             : 
     582             : /*******************************************************************************
     583             :  * \brief Returns the number of local blocks of the given matrix.
     584             :  * \author Ole Schuett
     585             :  ******************************************************************************/
     586     1020888 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
     587     1020888 :   int nblocks = 0;
     588     2041776 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     589     1020888 :     nblocks += matrix->shards[ishard].nblocks;
     590             :   }
     591     1020888 :   return nblocks;
     592             : }
     593             : 
     594             : /*******************************************************************************
     595             :  * \brief Returns the row block sizes of the given matrix.
     596             :  * \author Ole Schuett
     597             :  ******************************************************************************/
     598     4762462 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
     599             :                        const int **row_sizes) {
     600     4762462 :   *nrows = matrix->nrows;
     601     4762462 :   *row_sizes = matrix->row_sizes;
     602     4762462 : }
     603             : 
     604             : /*******************************************************************************
     605             :  * \brief Returns the column block sizes of the given matrix.
     606             :  * \author Ole Schuett
     607             :  ******************************************************************************/
     608     3791884 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
     609             :                        const int **col_sizes) {
     610     3791884 :   *ncols = matrix->ncols;
     611     3791884 :   *col_sizes = matrix->col_sizes;
     612     3791884 : }
     613             : 
     614             : /*******************************************************************************
     615             :  * \brief Returns the local row block sizes of the given matrix.
     616             :  * \author Ole Schuett
     617             :  ******************************************************************************/
     618      264756 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
     619             :                         const int **local_rows) {
     620      264756 :   *nlocal_rows = matrix->dist->rows.nlocals;
     621      264756 :   *local_rows = matrix->dist->rows.local_indicies;
     622      264756 : }
     623             : 
     624             : /*******************************************************************************
     625             :  * \brief Returns the local column block sizes of the given matrix.
     626             :  * \author Ole Schuett
     627             :  ******************************************************************************/
     628      122256 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
     629             :                         const int **local_cols) {
     630      122256 :   *nlocal_cols = matrix->dist->cols.nlocals;
     631      122256 :   *local_cols = matrix->dist->cols.local_indicies;
     632      122256 : }
     633             : 
     634             : /*******************************************************************************
     635             :  * \brief Returns the MPI rank on which the given block should be stored.
     636             :  * \author Ole Schuett
     637             :  ******************************************************************************/
     638    92379655 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
     639             :                                const int col) {
     640    92379655 :   return dbm_distribution_stored_coords(matrix->dist, row, col);
     641             : }
     642             : 
     643             : /*******************************************************************************
     644             :  * \brief Returns the distribution of the given matrix.
     645             :  * \author Ole Schuett
     646             :  ******************************************************************************/
     647     1251167 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
     648     1251167 :   return matrix->dist;
     649             : }
     650             : 
     651             : // EOF

Generated by: LCOV version 1.15