LCOV - code coverage report
Current view: top level - src/dbm - dbm_matrix.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:c24029e) Lines: 96.4 % 308 297
Test Date: 2026-07-04 06:36:57 Functions: 96.7 % 30 29

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : #include "dbm_matrix.h"
       8              : #include "dbm_hyperparams.h"
       9              : 
      10              : #include <assert.h>
      11              : #include <math.h>
      12              : #include <omp.h>
      13              : #include <stdbool.h>
      14              : #include <stddef.h>
      15              : #include <stdlib.h>
      16              : #include <string.h>
      17              : 
      18              : /*******************************************************************************
      19              :  * \brief Creates a new matrix.
      20              :  * \author Ole Schuett
      21              :  ******************************************************************************/
      22      1959766 : void dbm_create(dbm_matrix_t **matrix_out, dbm_distribution_t *dist,
      23              :                 const char name[], const int nrows, const int ncols,
      24              :                 const int row_sizes[nrows], const int col_sizes[ncols]) {
      25      1959766 :   assert(omp_get_num_threads() == 1);
      26              : 
      27      1959766 :   dbm_matrix_t *const matrix = calloc(1, sizeof(dbm_matrix_t));
      28              : 
      29      1959766 :   assert(dist->rows.length == nrows);
      30      1959766 :   assert(dist->cols.length == ncols);
      31      1959766 :   dbm_distribution_hold(dist);
      32      1959766 :   matrix->dist = dist;
      33              : 
      34      1959766 :   size_t size = (strlen(name) + 1) * sizeof(char);
      35      1959766 :   matrix->name = malloc(size);
      36      1959766 :   assert(matrix->name != NULL && name != NULL);
      37      1959766 :   memcpy(matrix->name, name, size);
      38              : 
      39      1959766 :   matrix->nrows = nrows;
      40      1959766 :   matrix->ncols = ncols;
      41              : 
      42      1959766 :   size = nrows * sizeof(int);
      43      1959766 :   matrix->row_sizes = malloc(size);
      44      1959766 :   assert(matrix->row_sizes != NULL || size == 0);
      45      1959766 :   if (size != 0) {
      46      1959206 :     memcpy(matrix->row_sizes, row_sizes, size);
      47              :   }
      48              : 
      49      1959766 :   size = ncols * sizeof(int);
      50      1959766 :   matrix->col_sizes = malloc(size);
      51      1959766 :   assert(matrix->col_sizes != NULL || size == 0);
      52      1959766 :   if (size != 0) {
      53      1955602 :     memcpy(matrix->col_sizes, col_sizes, size);
      54              :   }
      55              : 
      56      1959766 :   const int num_shards = dbm_get_num_shards(matrix);
      57      1959766 :   matrix->shards = malloc(num_shards * sizeof(dbm_shard_t));
      58      1959766 :   assert(matrix->shards != NULL || num_shards == 0);
      59      1959766 : #pragma omp parallel for
      60              :   for (int ishard = 0; ishard < num_shards; ishard++) {
      61              :     dbm_shard_init(&matrix->shards[ishard]);
      62              :   }
      63              : 
      64      1959766 :   assert(*matrix_out == NULL);
      65      1959766 :   *matrix_out = matrix;
      66      1959766 : }
      67              : 
      68              : /*******************************************************************************
      69              :  * \brief Releases a matrix and all its ressources.
      70              :  * \author Ole Schuett
      71              :  ******************************************************************************/
      72      1959766 : void dbm_release(dbm_matrix_t *matrix) {
      73      1959766 :   assert(omp_get_num_threads() == 1);
      74      3919532 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
      75      1959766 :     dbm_shard_release(&matrix->shards[ishard]);
      76              :   }
      77      1959766 :   dbm_distribution_release(matrix->dist);
      78      1959766 :   free(matrix->row_sizes);
      79      1959766 :   free(matrix->col_sizes);
      80      1959766 :   free(matrix->shards);
      81      1959766 :   free(matrix->name);
      82      1959766 :   free(matrix);
      83      1959766 : }
      84              : 
      85              : /*******************************************************************************
      86              :  * \brief Copies content of matrix_b into matrix_a.
      87              :  *        Matrices must have the same row/col block sizes and distribution.
      88              :  * \author Ole Schuett
      89              :  ******************************************************************************/
      90       489662 : void dbm_copy(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
      91       489662 :   assert(omp_get_num_threads() == 1);
      92              : 
      93       489662 :   assert(matrix_b->nrows == matrix_a->nrows);
      94      5811168 :   for (int i = 0; i < matrix_b->nrows; i++) {
      95      5321506 :     assert(matrix_b->row_sizes[i] == matrix_a->row_sizes[i]);
      96              :   }
      97       489662 :   assert(matrix_b->ncols == matrix_a->ncols);
      98     29933781 :   for (int i = 0; i < matrix_b->ncols; i++) {
      99     29444119 :     assert(matrix_b->col_sizes[i] == matrix_a->col_sizes[i]);
     100              :   }
     101              : 
     102       489662 :   assert(matrix_a->dist == matrix_b->dist);
     103              : 
     104       489662 : #pragma omp parallel for DBM_OMP_SCHEDULE
     105              :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix_a); ishard++) {
     106              :     dbm_shard_copy(&matrix_a->shards[ishard], &matrix_b->shards[ishard]);
     107              :   }
     108       489662 : }
     109              : 
     110              : /*******************************************************************************
     111              :  * \brief Copies content of matrix_b into matrix_a.
     112              :  *        Matrices may have different distributions.
     113              :  * \author Ole Schuett
     114              :  ******************************************************************************/
     115          144 : void dbm_redistribute(const dbm_matrix_t *matrix, dbm_matrix_t *redist) {
     116          144 :   assert(omp_get_num_threads() == 1);
     117          144 :   assert(matrix->nrows == redist->nrows);
     118         6384 :   for (int i = 0; i < matrix->nrows; i++) {
     119         6240 :     assert(matrix->row_sizes[i] == redist->row_sizes[i]);
     120              :   }
     121          144 :   assert(matrix->ncols == redist->ncols);
     122         6384 :   for (int i = 0; i < matrix->ncols; i++) {
     123         6240 :     assert(matrix->col_sizes[i] == redist->col_sizes[i]);
     124              :   }
     125              : 
     126          144 :   assert(cp_mpi_comms_are_similar(matrix->dist->comm, redist->dist->comm));
     127          144 :   const cp_mpi_comm_t comm = redist->dist->comm;
     128          144 :   const int nranks = cp_mpi_comm_size(comm);
     129              : 
     130              :   // 1st pass: Compute send_count.
     131          144 :   int send_count[nranks];
     132          144 :   memset(send_count, 0, nranks * sizeof(int));
     133          288 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     134          144 :     const dbm_shard_t *const shard = &matrix->shards[ishard];
     135         8936 :     for (int iblock = 0; iblock < shard->nblocks; iblock++) {
     136         8792 :       const dbm_block_t *const blk = &shard->blocks[iblock];
     137         8792 :       const int row_size = matrix->row_sizes[blk->row];
     138         8792 :       const int col_size = matrix->col_sizes[blk->col];
     139         8792 :       const int block_size = row_size * col_size;
     140         8792 :       const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
     141         8792 :       assert(0 <= rank && rank < nranks);
     142         8792 :       send_count[rank] += 2 + block_size;
     143              :     }
     144              :   }
     145              : 
     146              :   // 1st communication: Exchange counts.
     147          144 :   int recv_count[nranks];
     148          144 :   cp_mpi_alltoall_int(send_count, 1, recv_count, 1, comm);
     149              : 
     150              :   // Compute displacements and allocate data buffers.
     151          144 :   int send_displ[nranks + 1], recv_displ[nranks + 1];
     152          144 :   send_displ[0] = recv_displ[0] = 0;
     153          432 :   for (int irank = 1; irank <= nranks; irank++) {
     154          288 :     send_displ[irank] = send_displ[irank - 1] + send_count[irank - 1];
     155          288 :     recv_displ[irank] = recv_displ[irank - 1] + recv_count[irank - 1];
     156              :   }
     157          144 :   const int total_send_count = send_displ[nranks];
     158          144 :   const int total_recv_count = recv_displ[nranks];
     159          144 :   double *const data_send = cp_mpi_alloc_mem(total_send_count * sizeof(double));
     160          144 :   double *const data_recv = cp_mpi_alloc_mem(total_recv_count * sizeof(double));
     161              : 
     162              :   // 2nd pass: Fill send_data.
     163          144 :   int send_data_positions[nranks];
     164          144 :   memcpy(send_data_positions, send_displ, nranks * sizeof(int));
     165          288 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     166          144 :     const dbm_shard_t *const shard = &matrix->shards[ishard];
     167         8936 :     for (int iblock = 0; iblock < shard->nblocks; iblock++) {
     168         8792 :       const dbm_block_t *const blk = &shard->blocks[iblock];
     169         8792 :       const double *const blk_data = &shard->data[blk->offset];
     170         8792 :       const int row_size = matrix->row_sizes[blk->row];
     171         8792 :       const int col_size = matrix->col_sizes[blk->col];
     172         8792 :       const int block_size = row_size * col_size;
     173         8792 :       const int rank = dbm_get_stored_coordinates(redist, blk->row, blk->col);
     174         8792 :       const int pos = send_data_positions[rank];
     175         8792 :       data_send[pos + 0] = blk->row; // send integers as doubles
     176         8792 :       data_send[pos + 1] = blk->col;
     177         8792 :       memcpy(&data_send[pos + 2], blk_data, block_size * sizeof(double));
     178         8792 :       send_data_positions[rank] += 2 + block_size;
     179              :     }
     180              :   }
     181          432 :   for (int irank = 0; irank < nranks; irank++) {
     182          288 :     assert(send_data_positions[irank] == send_displ[irank + 1]);
     183              :   }
     184              : 
     185              :   // 2nd communication: Exchange data.
     186          144 :   cp_mpi_alltoallv_double(data_send, send_count, send_displ, data_recv,
     187              :                           recv_count, recv_displ, comm);
     188          144 :   cp_mpi_free_mem(data_send);
     189              : 
     190              :   // 3rd pass: Unpack data.
     191          144 :   dbm_clear(redist);
     192          144 :   int recv_data_pos = 0;
     193         8936 :   while (recv_data_pos < total_recv_count) {
     194         8792 :     const int row = (int)data_recv[recv_data_pos + 0];
     195         8792 :     const int col = (int)data_recv[recv_data_pos + 1];
     196         8792 :     assert(data_recv[recv_data_pos + 0] == (double)row);
     197         8792 :     assert(data_recv[recv_data_pos + 1] == (double)col);
     198         8792 :     dbm_put_block(redist, row, col, false, &data_recv[recv_data_pos + 2]);
     199         8792 :     const int row_size = matrix->row_sizes[row];
     200         8792 :     const int col_size = matrix->col_sizes[col];
     201         8792 :     const int block_size = row_size * col_size;
     202         8792 :     recv_data_pos += 2 + block_size;
     203              :   }
     204          144 :   assert(recv_data_pos == total_recv_count);
     205          144 :   cp_mpi_free_mem(data_recv);
     206          144 : }
     207              : 
     208              : /*******************************************************************************
     209              :  * \brief Looks up a block from given matrics. This routine is thread-safe.
     210              :  *        If the block is not found then a null pointer is returned.
     211              :  * \author Ole Schuett
     212              :  ******************************************************************************/
     213     23127892 : void dbm_get_block_p(dbm_matrix_t *matrix, const int row, const int col,
     214              :                      double **block, int *row_size, int *col_size) {
     215     23127892 :   assert(0 <= row && row < matrix->nrows);
     216     23127892 :   assert(0 <= col && col < matrix->ncols);
     217     23127892 :   assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
     218     23127892 :   *row_size = matrix->row_sizes[row];
     219     23127892 :   *col_size = matrix->col_sizes[col];
     220     23127892 :   *block = NULL;
     221              : 
     222     23127892 :   const int ishard = dbm_get_shard_index(matrix, row, col);
     223     23127892 :   const dbm_shard_t *const shard = &matrix->shards[ishard];
     224     23127892 :   const dbm_block_t *const blk = dbm_shard_lookup(shard, row, col);
     225     23127892 :   if (blk != NULL) {
     226     21493951 :     *block = &shard->data[blk->offset];
     227              :   }
     228     23127892 : }
     229              : 
     230              : /*******************************************************************************
     231              :  * \brief Adds a block to given matrix. This routine is thread-safe.
     232              :  *        If block already exist then it gets overwritten (or summed).
     233              :  * \author Ole Schuett
     234              :  ******************************************************************************/
     235     36873409 : void dbm_put_block(dbm_matrix_t *matrix, const int row, const int col,
     236              :                    const bool summation, const double *block) {
     237     36873409 :   assert(0 <= row && row < matrix->nrows);
     238     36873409 :   assert(0 <= col && col < matrix->ncols);
     239     36873409 :   assert(dbm_get_stored_coordinates(matrix, row, col) == matrix->dist->my_rank);
     240     36873409 :   const int row_size = matrix->row_sizes[row];
     241     36873409 :   const int col_size = matrix->col_sizes[col];
     242     36873409 :   const int block_size = row_size * col_size;
     243              : 
     244     36873409 :   const int ishard = dbm_get_shard_index(matrix, row, col);
     245     36873409 :   dbm_shard_t *const shard = &matrix->shards[ishard];
     246     36873409 :   omp_set_lock(&shard->lock);
     247     36873409 :   const dbm_block_t *const blk =
     248     36873409 :       dbm_shard_get_or_allocate_block(shard, row, col, block_size);
     249     36873409 :   double *const blk_data = &shard->data[blk->offset];
     250     36873409 :   if (summation) {
     251      3808163 :     assert(blk_data != NULL || 0 == block_size);
     252   1307511693 :     for (int i = 0; i < block_size; i++) {
     253   1303703530 :       blk_data[i] += block[i];
     254              :     }
     255     33065246 :   } else if (block_size != 0) {
     256     33060147 :     memcpy(blk_data, block, block_size * sizeof(double));
     257              :   }
     258     36873409 :   omp_unset_lock(&shard->lock);
     259     36873409 : }
     260              : 
     261              : /*******************************************************************************
     262              :  * \brief Remove all blocks from matrix, but does not release underlying memory.
     263              :  * \author Ole Schuett
     264              :  ******************************************************************************/
     265      2331774 : void dbm_clear(dbm_matrix_t *matrix) {
     266      2331774 :   assert(omp_get_num_threads() == 1);
     267              : 
     268      2331774 : #pragma omp parallel for DBM_OMP_SCHEDULE
     269              :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     270              :     dbm_shard_t *const shard = &matrix->shards[ishard];
     271              :     shard->data_promised = shard->data_size = shard->nblocks = 0;
     272              :     // Does not deallocate memory, hence data_allocated remains unchanged.
     273              :     memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
     274              :   }
     275      2331774 : }
     276              : 
     277              : /*******************************************************************************
     278              :  * \brief Removes all blocks from the matrix whose norm is below the threshold.
     279              :  *        Blocks of size zero are always kept.
     280              :  * \author Ole Schuett
     281              :  ******************************************************************************/
     282       692336 : void dbm_filter(dbm_matrix_t *matrix, const double eps) {
     283       692336 :   assert(omp_get_num_threads() == 1);
     284              : 
     285       692336 :   if (eps == 0.0) {
     286              :     return;
     287              :   }
     288       640582 :   const double eps2 = eps * eps;
     289              : 
     290       640582 : #pragma omp parallel for DBM_OMP_SCHEDULE
     291              :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     292              :     dbm_shard_t *const shard = &matrix->shards[ishard];
     293              :     const int old_nblocks = shard->nblocks;
     294              :     shard->nblocks = shard->data_promised = 0;
     295              :     memset(shard->hashtable, 0, shard->hashtable_size * sizeof(int));
     296              : 
     297              :     for (int iblock = 0; iblock < old_nblocks; iblock++) {
     298              :       const dbm_block_t old_blk = shard->blocks[iblock];
     299              :       const double *old_blk_data = &shard->data[old_blk.offset];
     300              :       const int row_size = matrix->row_sizes[old_blk.row];
     301              :       const int col_size = matrix->col_sizes[old_blk.col];
     302              :       const int block_size = row_size * col_size;
     303              :       double norm = 0.0;
     304              :       for (int i = 0; i < block_size; i++) {
     305              :         const double value = old_blk_data[i];
     306              :         norm += value * value;
     307              :         if (eps2 <= norm) {
     308              :           break;
     309              :         }
     310              :       }
     311              :       // For historic reasons zero-sized blocks are never filtered.
     312              :       if (block_size > 0 && norm < eps2) {
     313              :         continue; // filter the block
     314              :       }
     315              :       // Re-create block.
     316              :       dbm_block_t *const new_blk = dbm_shard_promise_new_block(
     317              :           shard, old_blk.row, old_blk.col, block_size);
     318              :       assert(new_blk->offset <= old_blk.offset);
     319              :       if (new_blk->offset != old_blk.offset) {
     320              :         // Using memmove because it can handle overlapping buffers.
     321              :         double *new_blk_data = &shard->data[new_blk->offset];
     322              :         memmove(new_blk_data, old_blk_data, block_size * sizeof(double));
     323              :       }
     324              :     }
     325              :     shard->data_size = shard->data_promised;
     326              :     // TODO: Could call realloc to release excess memory.
     327              :   }
     328              : }
     329              : 
     330              : /*******************************************************************************
     331              :  * \brief Adds list of blocks efficiently. The blocks will be filled with zeros.
     332              :  *        This routine must always be called within an OpenMP parallel region.
     333              :  * \author Ole Schuett
     334              :  ******************************************************************************/
     335      1630787 : void dbm_reserve_blocks(dbm_matrix_t *matrix, const int nblocks,
     336              :                         const int rows[], const int cols[]) {
     337      1630787 :   assert(omp_get_num_threads() == omp_get_max_threads() &&
     338              :          "Please call dbm_reserve_blocks within an OpenMP parallel region.");
     339      1630787 :   const int my_rank = matrix->dist->my_rank;
     340              : 
     341     32739773 :   for (int i = 0; i < nblocks; i++) {
     342     31108986 :     const int row = rows[i], col = cols[i];
     343     31108986 :     assert(0 <= row && row < matrix->nrows);
     344     31108986 :     assert(0 <= col && col < matrix->ncols);
     345     31108986 :     assert(dbm_get_stored_coordinates(matrix, row, col) == my_rank);
     346     31108986 :     const int ishard = dbm_get_shard_index(matrix, row, col);
     347     31108986 :     dbm_shard_t *const shard = &matrix->shards[ishard];
     348     31108986 :     const int row_size = matrix->row_sizes[row];
     349     31108986 :     const int col_size = matrix->col_sizes[col];
     350     31108986 :     const int block_size = row_size * col_size;
     351     31108986 :     omp_set_lock(&shard->lock);
     352     31108986 :     dbm_shard_get_or_promise_block(shard, row, col, block_size);
     353     31108986 :     omp_unset_lock(&shard->lock);
     354              :   }
     355      1630787 : #pragma omp barrier
     356              : 
     357              : #pragma omp for DBM_OMP_SCHEDULE
     358      1630787 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     359      1630787 :     dbm_shard_t *const shard = &matrix->shards[ishard];
     360      1630787 :     dbm_shard_allocate_promised_blocks(shard);
     361              :   }
     362      1630787 : }
     363              : 
     364              : /*******************************************************************************
     365              :  * \brief Multiplies all entries in the given matrix by the given factor alpha.
     366              :  * \author Ole Schuett
     367              :  ******************************************************************************/
     368       532593 : void dbm_scale(dbm_matrix_t *matrix, const double alpha) {
     369       532593 :   assert(omp_get_num_threads() == 1);
     370       532593 :   if (alpha == 1.0) {
     371              :     return;
     372              :   }
     373       407389 :   if (alpha == 0.0) {
     374       395517 :     dbm_zero(matrix);
     375       395517 :     return;
     376              :   }
     377              : 
     378        11872 : #pragma omp parallel for DBM_OMP_SCHEDULE
     379              :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     380              :     dbm_shard_t *const shard = &matrix->shards[ishard];
     381              :     for (int i = 0; i < shard->data_size; i++) {
     382              :       shard->data[i] *= alpha;
     383              :     }
     384              :   }
     385              : }
     386              : 
     387              : /*******************************************************************************
     388              :  * \brief Sets all blocks in the given matrix to zero.
     389              :  * \author Ole Schuett
     390              :  ******************************************************************************/
     391       395517 : void dbm_zero(dbm_matrix_t *matrix) {
     392       395517 :   assert(omp_get_num_threads() == 1);
     393              : 
     394       395517 : #pragma omp parallel for DBM_OMP_SCHEDULE
     395              :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     396              :     dbm_shard_t *const shard = &matrix->shards[ishard];
     397              :     if (shard->data != NULL) {
     398              :       memset(shard->data, 0, shard->data_size * sizeof(double));
     399              :     }
     400              :   }
     401       395517 : }
     402              : 
     403              : /*******************************************************************************
     404              :  * \brief Adds matrix_b to matrix_a.
     405              :  * \author Ole Schuett
     406              :  ******************************************************************************/
     407       242460 : void dbm_add(dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
     408       242460 :   assert(omp_get_num_threads() == 1);
     409       242460 :   assert(matrix_a->dist == matrix_b->dist);
     410              : 
     411       242460 : #pragma omp parallel for DBM_OMP_SCHEDULE
     412              :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix_b); ishard++) {
     413              :     dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
     414              :     const dbm_shard_t *shard_b = &matrix_b->shards[ishard];
     415              :     for (int iblock = 0; iblock < shard_b->nblocks; iblock++) {
     416              :       const dbm_block_t blk_b = shard_b->blocks[iblock];
     417              :       const int row_size = matrix_b->row_sizes[blk_b.row];
     418              :       const int col_size = matrix_b->col_sizes[blk_b.col];
     419              :       assert(row_size == matrix_a->row_sizes[blk_b.row]);
     420              :       assert(col_size == matrix_a->col_sizes[blk_b.col]);
     421              :       const int block_size = row_size * col_size;
     422              :       const dbm_block_t *const blk_a = dbm_shard_get_or_allocate_block(
     423              :           shard_a, blk_b.row, blk_b.col, block_size);
     424              :       double *const data_a = &shard_a->data[blk_a->offset];
     425              :       const double *const data_b = &shard_b->data[blk_b.offset];
     426              :       for (int i = 0; i < block_size; i++) {
     427              :         data_a[i] += data_b[i];
     428              :       }
     429              :     }
     430              :   }
     431       242460 : }
     432              : 
     433              : /*******************************************************************************
     434              :  * \brief Creates an iterator for the blocks of the given matrix.
     435              :  *        The iteration order is not stable.
     436              :  *        This routine must always be called within an OpenMP parallel region.
     437              :  * \author Ole Schuett
     438              :  ******************************************************************************/
     439      3868855 : void dbm_iterator_start(dbm_iterator_t **iter_out, const dbm_matrix_t *matrix) {
     440      3868855 :   assert(omp_get_num_threads() == omp_get_max_threads() &&
     441              :          "Please call dbm_iterator_start within an OpenMP parallel region.");
     442      3868855 :   dbm_iterator_t *iter = malloc(sizeof(dbm_iterator_t));
     443      3868855 :   assert(iter != NULL);
     444      3868855 :   iter->matrix = matrix;
     445      3868855 :   iter->next_block = 0;
     446      3868855 :   iter->next_shard = omp_get_thread_num();
     447      4500302 :   while (iter->next_shard < dbm_get_num_shards(matrix) &&
     448      3868855 :          matrix->shards[iter->next_shard].nblocks == 0) {
     449       631447 :     iter->next_shard += omp_get_num_threads();
     450              :   }
     451      3868855 :   assert(*iter_out == NULL);
     452      3868855 :   *iter_out = iter;
     453      3868855 : }
     454              : 
     455              : /*******************************************************************************
     456              :  * \brief Returns number of blocks the iterator will provide to calling thread.
     457              :  * \author Ole Schuett
     458              :  ******************************************************************************/
     459       717096 : int dbm_iterator_num_blocks(const dbm_iterator_t *iter) {
     460       717096 :   int num_blocks = 0;
     461       717096 :   for (int ishard = omp_get_thread_num();
     462      1434192 :        ishard < dbm_get_num_shards(iter->matrix);
     463       717096 :        ishard += omp_get_num_threads()) {
     464       717096 :     num_blocks += iter->matrix->shards[ishard].nblocks;
     465              :   }
     466       717096 :   return num_blocks;
     467              : }
     468              : 
     469              : /*******************************************************************************
     470              :  * \brief Tests whether the given iterator has any block left.
     471              :  * \author Ole Schuett
     472              :  ******************************************************************************/
     473     64949272 : bool dbm_iterator_blocks_left(const dbm_iterator_t *iter) {
     474     64949272 :   return iter->next_shard < dbm_get_num_shards(iter->matrix);
     475              : }
     476              : 
     477              : /*******************************************************************************
     478              :  * \brief Returns the next block from the given iterator.
     479              :  * \author Ole Schuett
     480              :  ******************************************************************************/
     481     72083322 : void dbm_iterator_next_block(dbm_iterator_t *iter, int *row, int *col,
     482              :                              double **block, int *row_size, int *col_size) {
     483     72083322 :   const dbm_matrix_t *matrix = iter->matrix;
     484     72083322 :   assert(iter->next_shard < dbm_get_num_shards(matrix));
     485     72083322 :   const dbm_shard_t *shard = &matrix->shards[iter->next_shard];
     486     72083322 :   assert(iter->next_block < shard->nblocks);
     487     72083322 :   dbm_block_t *blk = &shard->blocks[iter->next_block];
     488              : 
     489     72083322 :   *row = blk->row;
     490     72083322 :   *col = blk->col;
     491     72083322 :   *row_size = matrix->row_sizes[blk->row];
     492     72083322 :   *col_size = matrix->col_sizes[blk->col];
     493     72083322 :   *block = &shard->data[blk->offset];
     494              : 
     495     72083322 :   iter->next_block++;
     496     72083322 :   if (iter->next_block >= shard->nblocks) {
     497              :     // Advance to the next non-empty shard...
     498      3237408 :     iter->next_shard += omp_get_num_threads();
     499      3237408 :     while (iter->next_shard < dbm_get_num_shards(matrix) &&
     500            0 :            matrix->shards[iter->next_shard].nblocks == 0) {
     501            0 :       iter->next_shard += omp_get_num_threads();
     502              :     }
     503      3237408 :     iter->next_block = 0; // ...and continue with its first block.
     504              :   }
     505     72083322 : }
     506              : 
     507              : /*******************************************************************************
     508              :  * \brief Releases the given iterator.
     509              :  * \author Ole Schuett
     510              :  ******************************************************************************/
     511      3868855 : void dbm_iterator_stop(dbm_iterator_t *iter) { free(iter); }
     512              : 
     513              : /*******************************************************************************
     514              :  * \brief Private routine to accumulate using Kahan's summation.
     515              :  * \author Hans Pabst
     516              :  ******************************************************************************/
     517     14383468 : static double kahan_sum(double value, double *accumulator,
     518              :                         double *compensation) {
     519     14383468 :   double r, c;
     520     14383468 :   assert(NULL != accumulator && NULL != compensation);
     521     14383468 :   c = value - *compensation;
     522     14383468 :   r = *accumulator + c;
     523     14383468 :   *compensation = (r - *accumulator) - c;
     524     14383468 :   *accumulator = r;
     525     14383468 :   return r;
     526              : }
     527              : 
     528              : /*******************************************************************************
     529              :  * \brief Computes a checksum of the given matrix.
     530              :  * \author Ole Schuett
     531              :  ******************************************************************************/
     532          190 : double dbm_checksum(const dbm_matrix_t *matrix) {
     533          190 :   double checksum = 0.0, compensation = 0.0;
     534          380 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     535          190 :     const dbm_shard_t *shard = &matrix->shards[ishard];
     536     14383658 :     for (int i = 0; i < shard->data_size; i++) {
     537     14383468 :       const double value = shard->data[i];
     538     14383468 :       kahan_sum(value * value, &checksum, &compensation);
     539              :     }
     540              :   }
     541          190 :   cp_mpi_sum_double(&checksum, 1, matrix->dist->comm);
     542          190 :   return checksum;
     543              : }
     544              : 
     545              : /*******************************************************************************
     546              :  * \brief Returns the largest absolute value of all matrix elements.
     547              :  * \author Ole Schuett
     548              :  ******************************************************************************/
     549           48 : double dbm_maxabs(const dbm_matrix_t *matrix) {
     550           48 :   double maxabs = 0.0;
     551           96 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     552           48 :     const dbm_shard_t *shard = &matrix->shards[ishard];
     553      1599050 :     for (int i = 0; i < shard->data_size; i++) {
     554      1599002 :       maxabs = fmax(maxabs, fabs(shard->data[i]));
     555              :     }
     556              :   }
     557           48 :   cp_mpi_max_double(&maxabs, 1, matrix->dist->comm);
     558           48 :   return maxabs;
     559              : }
     560              : 
     561              : /*******************************************************************************
     562              :  * \brief Calculates maximum relative difference between matrix_a and matrix_b.
     563              :  * \author Hans Pabst
     564              :  ******************************************************************************/
     565            0 : double dbm_maxeps(const dbm_matrix_t *matrix_a, const dbm_matrix_t *matrix_b) {
     566            0 :   const int num_shards = dbm_get_num_shards(matrix_a);
     567            0 :   double epsilon = 0;
     568              : 
     569            0 :   assert(omp_get_num_threads() == 1);
     570            0 :   assert(matrix_a->dist == matrix_b->dist);
     571            0 :   assert(num_shards == dbm_get_num_shards(matrix_b));
     572              : 
     573              :   // Handles asymmetric sparsity: blocks in A but not B are compared against
     574              :   // zero, and blocks in B but not A are checked in a second pass. The
     575              :   // reduction(max) avoids the previous data race on the shared epsilon.
     576            0 : #pragma omp parallel for reduction(max : epsilon) DBM_OMP_SCHEDULE
     577              :   for (int ishard = 0; ishard < num_shards; ++ishard) {
     578              :     const dbm_shard_t *const shard_a = &matrix_a->shards[ishard];
     579              :     const dbm_shard_t *const shard_b = &matrix_b->shards[ishard];
     580              :     for (int iblock = 0; iblock < shard_a->nblocks; ++iblock) {
     581              :       const dbm_block_t *const blk_a = &shard_a->blocks[iblock];
     582              :       const dbm_block_t *const blk_b =
     583              :           dbm_shard_lookup(shard_b, blk_a->row, blk_a->col);
     584              :       const int row_size = matrix_a->row_sizes[blk_a->row];
     585              :       const int col_size = matrix_a->col_sizes[blk_a->col];
     586              :       assert(NULL == blk_b || row_size == matrix_b->row_sizes[blk_b->row]);
     587              :       assert(NULL == blk_b || col_size == matrix_b->col_sizes[blk_b->col]);
     588              :       const double *const data_a = &shard_a->data[blk_a->offset];
     589              :       const double *const data_b =
     590              :           (NULL == blk_b) ? NULL : &shard_b->data[blk_b->offset];
     591              :       const int block_size = row_size * col_size;
     592              :       for (int i = 0; i < block_size; ++i) {
     593              :         const double d = data_a[i] - (NULL == data_b ? 0.0 : data_b[i]);
     594              :         const double e = fabs(0 != data_a[i] ? (d / data_a[i]) : d);
     595              :         if (epsilon < e) {
     596              :           epsilon = e;
     597              :         }
     598              :       }
     599              :     }
     600              :     // Second pass: blocks present in B but absent from A (treat A as zero).
     601              :     for (int iblock = 0; iblock < shard_b->nblocks; ++iblock) {
     602              :       const dbm_block_t *const blk_b = &shard_b->blocks[iblock];
     603              :       if (NULL != dbm_shard_lookup(shard_a, blk_b->row, blk_b->col)) {
     604              :         continue;
     605              :       }
     606              :       const int row_size = matrix_b->row_sizes[blk_b->row];
     607              :       const int col_size = matrix_b->col_sizes[blk_b->col];
     608              :       const double *const data_b = &shard_b->data[blk_b->offset];
     609              :       const int block_size = row_size * col_size;
     610              :       for (int i = 0; i < block_size; ++i) {
     611              :         const double e = fabs(data_b[i]);
     612              :         if (epsilon < e) {
     613              :           epsilon = e;
     614              :         }
     615              :       }
     616              :     }
     617              :   }
     618              : 
     619            0 :   cp_mpi_max_double(&epsilon, 1, matrix_a->dist->comm);
     620            0 :   return epsilon;
     621              : }
     622              : 
     623              : /*******************************************************************************
     624              :  * \brief Returns the name of the matrix of the given matrix.
     625              :  * \author Ole Schuett
     626              :  ******************************************************************************/
     627      2016589 : const char *dbm_get_name(const dbm_matrix_t *matrix) { return matrix->name; }
     628              : 
     629              : /*******************************************************************************
     630              :  * \brief Returns the number of local Non-Zero Elements of the given matrix.
     631              :  * \author Ole Schuett
     632              :  ******************************************************************************/
     633      2059255 : int dbm_get_nze(const dbm_matrix_t *matrix) {
     634      2059255 :   int nze = 0;
     635      4118510 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     636      2059255 :     nze += matrix->shards[ishard].data_size;
     637              :   }
     638      2059255 :   return nze;
     639              : }
     640              : 
     641              : /*******************************************************************************
     642              :  * \brief Returns the number of local blocks of the given matrix.
     643              :  * \author Ole Schuett
     644              :  ******************************************************************************/
     645      1080392 : int dbm_get_num_blocks(const dbm_matrix_t *matrix) {
     646      1080392 :   int nblocks = 0;
     647      2160784 :   for (int ishard = 0; ishard < dbm_get_num_shards(matrix); ishard++) {
     648      1080392 :     nblocks += matrix->shards[ishard].nblocks;
     649              :   }
     650      1080392 :   return nblocks;
     651              : }
     652              : 
     653              : /*******************************************************************************
     654              :  * \brief Returns the row block sizes of the given matrix.
     655              :  * \author Ole Schuett
     656              :  ******************************************************************************/
     657      4384814 : void dbm_get_row_sizes(const dbm_matrix_t *matrix, int *nrows,
     658              :                        const int **row_sizes) {
     659      4384814 :   *nrows = matrix->nrows;
     660      4384814 :   *row_sizes = matrix->row_sizes;
     661      4384814 : }
     662              : 
     663              : /*******************************************************************************
     664              :  * \brief Returns the column block sizes of the given matrix.
     665              :  * \author Ole Schuett
     666              :  ******************************************************************************/
     667      2917120 : void dbm_get_col_sizes(const dbm_matrix_t *matrix, int *ncols,
     668              :                        const int **col_sizes) {
     669      2917120 :   *ncols = matrix->ncols;
     670      2917120 :   *col_sizes = matrix->col_sizes;
     671      2917120 : }
     672              : 
     673              : /*******************************************************************************
     674              :  * \brief Returns the local row block sizes of the given matrix.
     675              :  * \author Ole Schuett
     676              :  ******************************************************************************/
     677       309492 : void dbm_get_local_rows(const dbm_matrix_t *matrix, int *nlocal_rows,
     678              :                         const int **local_rows) {
     679       309492 :   *nlocal_rows = matrix->dist->rows.nlocals;
     680       309492 :   *local_rows = matrix->dist->rows.local_indicies;
     681       309492 : }
     682              : 
     683              : /*******************************************************************************
     684              :  * \brief Returns the local column block sizes of the given matrix.
     685              :  * \author Ole Schuett
     686              :  ******************************************************************************/
     687       138352 : void dbm_get_local_cols(const dbm_matrix_t *matrix, int *nlocal_cols,
     688              :                         const int **local_cols) {
     689       138352 :   *nlocal_cols = matrix->dist->cols.nlocals;
     690       138352 :   *local_cols = matrix->dist->cols.local_indicies;
     691       138352 : }
     692              : 
     693              : /*******************************************************************************
     694              :  * \brief Returns the MPI rank on which the given block should be stored.
     695              :  * \author Ole Schuett
     696              :  ******************************************************************************/
     697     99289745 : int dbm_get_stored_coordinates(const dbm_matrix_t *matrix, const int row,
     698              :                                const int col) {
     699     99289745 :   return dbm_distribution_stored_coords(matrix->dist, row, col);
     700              : }
     701              : 
     702              : /*******************************************************************************
     703              :  * \brief Returns the distribution of the given matrix.
     704              :  * \author Ole Schuett
     705              :  ******************************************************************************/
     706      1419064 : const dbm_distribution_t *dbm_get_distribution(const dbm_matrix_t *matrix) {
     707      1419064 :   return matrix->dist;
     708              : }
     709              : 
     710              : // EOF
        

Generated by: LCOV version 2.0-1