LCOV - code coverage report
Current view: top level - src/grid/dgemm - grid_dgemm_context.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:42dac4a) Lines: 77.6 % 339 263
Test Date: 2025-07-25 12:55:17 Functions: 61.9 % 21 13

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include <math.h>
       9              : #include <omp.h>
      10              : #include <stdio.h>
      11              : #include <stdlib.h>
      12              : #include <string.h>
      13              : 
      14              : #include "../common/grid_library.h"
      15              : #include "grid_dgemm_collocate.h"
      16              : #include "grid_dgemm_collocation_integration.h"
      17              : #include "grid_dgemm_context.h"
      18              : #include "grid_dgemm_private_header.h"
      19              : #include "grid_dgemm_task_list.h"
      20              : #include "grid_dgemm_tensor_local.h"
      21              : #include "grid_dgemm_utils.h"
      22              : 
      23            0 : void return_dh(void *const ptr, const int level, double *const dh) {
      24            0 :   grid_context *const ctx = (grid_context *)ptr;
      25              : 
      26            0 :   assert(ctx->checksum == ctx_checksum);
      27            0 :   dh[0] = ctx->grid[level].dh[0][0];
      28            0 :   dh[1] = ctx->grid[level].dh[0][1];
      29            0 :   dh[2] = ctx->grid[level].dh[0][2];
      30            0 :   dh[3] = ctx->grid[level].dh[1][0];
      31            0 :   dh[4] = ctx->grid[level].dh[1][1];
      32            0 :   dh[5] = ctx->grid[level].dh[1][2];
      33            0 :   dh[6] = ctx->grid[level].dh[2][0];
      34            0 :   dh[7] = ctx->grid[level].dh[2][1];
      35            0 :   dh[8] = ctx->grid[level].dh[2][2];
      36            0 : }
      37              : 
      38            0 : void return_dh_inv(void *const ptr, const int level, double *const dh_inv) {
      39            0 :   grid_context *const ctx = (grid_context *)ptr;
      40              : 
      41            0 :   assert(ctx->checksum == ctx_checksum);
      42            0 :   dh_inv[0] = ctx->grid[level].dh_inv[0][0];
      43            0 :   dh_inv[1] = ctx->grid[level].dh_inv[0][1];
      44            0 :   dh_inv[2] = ctx->grid[level].dh_inv[0][2];
      45            0 :   dh_inv[3] = ctx->grid[level].dh_inv[1][0];
      46            0 :   dh_inv[4] = ctx->grid[level].dh_inv[1][1];
      47            0 :   dh_inv[5] = ctx->grid[level].dh_inv[1][2];
      48            0 :   dh_inv[6] = ctx->grid[level].dh_inv[2][0];
      49            0 :   dh_inv[7] = ctx->grid[level].dh_inv[2][1];
      50            0 :   dh_inv[8] = ctx->grid[level].dh_inv[2][2];
      51            0 : }
      52              : 
      53            0 : int return_num_devs(void *const ptr) {
      54            0 :   grid_context *const ctx = (grid_context *)ptr;
      55            0 :   assert(ctx->checksum == ctx_checksum);
      56              : 
      57            0 :   return ctx->number_of_devices;
      58              : }
      59              : 
      60            0 : int return_device_id(void *const ptr, const int device) {
      61            0 :   grid_context *const ctx = (grid_context *)ptr;
      62            0 :   assert(ctx->checksum == ctx_checksum);
      63              : 
      64            0 :   return ctx->device_id[device];
      65              : }
      66              : 
      67            0 : int is_grid_orthorhombic(void *const ptr) {
      68            0 :   grid_context *const ctx = (grid_context *)ptr;
      69            0 :   assert(ctx->checksum == ctx_checksum);
      70            0 :   return ctx->orthorhombic;
      71              : }
      72              : 
      73            0 : void update_queue_length(void *const ptr, const int queue_length) {
      74            0 :   grid_context *const ctx = (grid_context *)ptr;
      75            0 :   assert(ctx->checksum == ctx_checksum);
      76            0 :   ctx->queue_length = queue_length;
      77            0 : }
      78              : 
      79           20 : void update_atoms_position(const int natoms,
      80              :                            const double atoms_positions[natoms][3],
      81              :                            grid_context *data) {
      82           20 :   assert(data != NULL);
      83              : 
      84           20 :   if (natoms == 0)
      85              :     return;
      86              : 
      87           20 :   if (data->atom_positions == NULL) {
      88            8 :     data->atom_positions = malloc(3 * natoms * sizeof(double));
      89              :   } else {
      90           12 :     if (natoms > data->natoms) {
      91            0 :       data->atom_positions =
      92            0 :           realloc(data->atom_positions, 3 * natoms * sizeof(double));
      93              :     }
      94              :   }
      95           20 :   assert(data->atom_positions != NULL);
      96              : 
      97           20 :   data->natoms = natoms;
      98              : 
      99           20 :   if (data->atom_positions) {
     100           78 :     for (int i = 0; i < natoms; i++) {
     101           58 :       data->atom_positions[3 * i] = atoms_positions[i][0];
     102           58 :       data->atom_positions[3 * i + 1] = atoms_positions[i][1];
     103           58 :       data->atom_positions[3 * i + 2] = atoms_positions[i][2];
     104              :     }
     105              :   }
     106              : }
     107              : 
     108           20 : void update_atoms_kinds(const int natoms, const int *atoms_kinds,
     109              :                         grid_context *data) {
     110           20 :   assert(data != NULL);
     111              : 
     112              :   // data->atom_kinds is a table that give the type of a given atom.
     113           20 :   if (natoms == 0)
     114              :     return;
     115              : 
     116           20 :   if (data->atom_kinds == NULL) {
     117            8 :     data->atom_kinds = malloc(natoms * sizeof(int));
     118              :   } else {
     119           12 :     if ((natoms > data->natoms) && (data->natoms > 0)) {
     120            0 :       data->atom_kinds = realloc(data->atom_kinds, natoms * sizeof(int));
     121              :     }
     122              :   }
     123           20 :   assert(data->atom_kinds != NULL);
     124              :   // data->natoms is initialized before calling this function
     125           20 :   if (data->natoms)
     126           20 :     memcpy(data->atom_kinds, atoms_kinds, sizeof(int) * natoms);
     127              : 
     128           78 :   for (int i = 0; i < natoms; i++) {
     129           58 :     data->atom_kinds[i] -= 1;
     130              :   }
     131              : }
     132              : 
     133           20 : void update_block_offsets(const int nblocks, const int *const block_offsets,
     134              :                           grid_context *data) {
     135           20 :   assert(data != NULL);
     136              : 
     137           20 :   if (nblocks == 0)
     138              :     return;
     139              : 
     140           19 :   if (data->block_offsets == NULL) {
     141            7 :     data->block_offsets = malloc(nblocks * sizeof(int));
     142              :   } else {
     143           12 :     if ((nblocks > data->nblocks_total) && (data->nblocks_total > 0)) {
     144            0 :       data->block_offsets = realloc(data->block_offsets, sizeof(int) * nblocks);
     145              :     }
     146              :   }
     147           19 :   assert(data->block_offsets != NULL);
     148              : 
     149           19 :   data->nblocks = nblocks;
     150           19 :   data->nblocks_total = imax(data->nblocks_total, nblocks);
     151           19 :   if (nblocks)
     152           19 :     memcpy(data->block_offsets, block_offsets, nblocks * sizeof(int));
     153              : }
     154              : 
     155           20 : void update_basis_set(const int nkinds, const grid_basis_set **const basis_sets,
     156              :                       grid_context *data) {
     157           20 :   if (nkinds > data->nkinds_total) {
     158            8 :     if (data->basis_sets == NULL) {
     159            8 :       data->basis_sets = malloc(nkinds * sizeof(grid_basis_set *));
     160              :     } else {
     161            0 :       data->basis_sets =
     162            0 :           realloc(data->basis_sets, nkinds * sizeof(grid_basis_set *));
     163              :     }
     164              :   }
     165           20 :   assert(data->basis_sets != NULL);
     166           20 :   data->nkinds = nkinds;
     167           20 :   data->nkinds_total = imax(data->nkinds_total, nkinds);
     168           20 :   memcpy(data->basis_sets, basis_sets, nkinds * sizeof(grid_basis_set *));
     169           20 : }
     170              : 
     171           20 : void update_task_lists(const int nlevels, const int ntasks,
     172              :                        const int *const level_list, const int *const iatom_list,
     173              :                        const int *const jatom_list, const int *const iset_list,
     174              :                        const int *const jset_list, const int *const ipgf_list,
     175              :                        const int *const jpgf_list,
     176              :                        const int *const border_mask_list,
     177              :                        const int *block_num_list,
     178              :                        const double *const radius_list,
     179              :                        const double rab_list[ntasks][3], grid_context *ctx) {
     180              : 
     181           20 :   assert(ctx->checksum == ctx_checksum);
     182              : 
     183           20 :   if (nlevels == 0)
     184              :     return;
     185              : 
     186           20 :   if (ctx->ntasks == 0) {
     187              :     // Count tasks per level.
     188            8 :     size_t size = nlevels * sizeof(int);
     189            8 :     ctx->tasks_per_level = malloc(size);
     190            8 :     ctx->tasks = malloc(nlevels * sizeof(_task *));
     191              :     /* memset(ctx->tasks, 0, nlevels * sizeof(_task *)); */
     192            8 :     if (ntasks)
     193            7 :       ctx->tasks[0] = malloc(ntasks * sizeof(_task));
     194              :     else
     195            1 :       ctx->tasks[0] = NULL;
     196              :   } else {
     197           12 :     if (ctx->nlevels_total < nlevels) {
     198              :       /* save the address of the full task list. NULL when completly empty */
     199            0 :       ctx->tasks = realloc(ctx->tasks, nlevels * sizeof(_task *));
     200            0 :       assert(ctx->tasks != NULL);
     201              :     }
     202           12 :     if (ctx->ntasks_total < ntasks) {
     203            0 :       ctx->tasks[0] = realloc(ctx->tasks[0], ntasks * sizeof(_task));
     204            0 :       assert(ctx->tasks[0] != NULL);
     205              :     }
     206              :   }
     207              : 
     208           20 :   memset(ctx->tasks_per_level, 0, nlevels * sizeof(int));
     209           20 :   ctx->nlevels = nlevels;
     210           20 :   ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
     211           20 :   ctx->ntasks_total = imax(ctx->ntasks_total, ntasks);
     212           20 :   ctx->ntasks = ntasks;
     213              : 
     214         5793 :   for (int i = 0; i < ntasks; i++) {
     215         5773 :     ctx->tasks_per_level[level_list[i] - 1]++;
     216         5773 :     assert(i == 0 || level_list[i] >= level_list[i - 1]); // expect ordered list
     217              :   }
     218              : 
     219           80 :   for (int i = 1; i < ctx->nlevels; i++) {
     220           60 :     ctx->tasks[i] = ctx->tasks[i - 1] + ctx->tasks_per_level[i - 1];
     221              :   }
     222              : 
     223           20 :   int prev_block_num = -1;
     224           20 :   int prev_iset = -1;
     225           20 :   int prev_jset = -1;
     226           20 :   int prev_level = -1;
     227           20 :   _task *task = ctx->tasks[0];
     228         5793 :   for (int i = 0; i < ntasks; i++) {
     229         5773 :     if (prev_level != (level_list[i] - 1)) {
     230           57 :       prev_level = level_list[i] - 1;
     231           57 :       prev_block_num = -1;
     232           57 :       prev_iset = -1;
     233           57 :       prev_jset = -1;
     234              :     }
     235         5773 :     task->level = level_list[i] - 1;
     236         5773 :     task->iatom = iatom_list[i] - 1;
     237         5773 :     task->jatom = jatom_list[i] - 1;
     238         5773 :     task->iset = iset_list[i] - 1;
     239         5773 :     task->jset = jset_list[i] - 1;
     240         5773 :     task->ipgf = ipgf_list[i] - 1;
     241         5773 :     task->jpgf = jpgf_list[i] - 1;
     242         5773 :     task->border_mask = border_mask_list[i];
     243         5773 :     task->block_num = block_num_list[i] - 1;
     244         5773 :     task->radius = radius_list[i];
     245         5773 :     task->rab[0] = rab_list[i][0];
     246         5773 :     task->rab[1] = rab_list[i][1];
     247         5773 :     task->rab[2] = rab_list[i][2];
     248         5773 :     const int iatom = task->iatom;
     249         5773 :     const int jatom = task->jatom;
     250         5773 :     const int iset = task->iset;
     251         5773 :     const int jset = task->jset;
     252         5773 :     const int ipgf = task->ipgf;
     253         5773 :     const int jpgf = task->jpgf;
     254         5773 :     const int ikind = ctx->atom_kinds[iatom];
     255         5773 :     const int jkind = ctx->atom_kinds[jatom];
     256         5773 :     const grid_basis_set *ibasis = ctx->basis_sets[ikind];
     257         5773 :     const grid_basis_set *jbasis = ctx->basis_sets[jkind];
     258         5773 :     const int ncoseta = ncoset(ibasis->lmax[iset]);
     259         5773 :     const int ncosetb = ncoset(jbasis->lmax[jset]);
     260              : 
     261         5773 :     task->zeta[0] = ibasis->zet[iset * ibasis->maxpgf + ipgf];
     262         5773 :     task->zeta[1] = jbasis->zet[jset * jbasis->maxpgf + jpgf];
     263              : 
     264         5773 :     const double *ra = &ctx->atom_positions[3 * iatom];
     265         5773 :     const double zetp = task->zeta[0] + task->zeta[1];
     266         5773 :     const double f = task->zeta[1] / zetp;
     267         5773 :     const double rab2 = task->rab[0] * task->rab[0] +
     268         5773 :                         task->rab[1] * task->rab[1] +
     269         5773 :                         task->rab[2] * task->rab[2];
     270              : 
     271         5773 :     task->prefactor = exp(-task->zeta[0] * f * rab2);
     272         5773 :     task->zetp = zetp;
     273              : 
     274         5773 :     const int block_num = task->block_num;
     275              : 
     276        23092 :     for (int i = 0; i < 3; i++) {
     277        17319 :       task->ra[i] = ra[i];
     278        17319 :       task->rp[i] = ra[i] + f * task->rab[i];
     279        17319 :       task->rb[i] = ra[i] + task->rab[i];
     280              :     }
     281              : 
     282         5773 :     task->lmax[0] = ibasis->lmax[iset];
     283         5773 :     task->lmax[1] = jbasis->lmax[jset];
     284         5773 :     task->lmin[0] = ibasis->lmin[iset];
     285         5773 :     task->lmin[1] = jbasis->lmin[jset];
     286              : 
     287         5773 :     if ((block_num != prev_block_num) || (iset != prev_iset) ||
     288              :         (jset != prev_jset)) {
     289          558 :       task->update_block_ = true;
     290          558 :       prev_block_num = block_num;
     291          558 :       prev_iset = iset;
     292          558 :       prev_jset = jset;
     293              :     } else {
     294         5215 :       task->update_block_ = false;
     295              :     }
     296              : 
     297         5773 :     task->offset[0] = ipgf * ncoseta;
     298         5773 :     task->offset[1] = jpgf * ncosetb;
     299         5773 :     task++;
     300              :   }
     301              : 
     302              :   // Find largest Cartesian subblock size.
     303           20 :   ctx->maxco = 0;
     304           56 :   for (int i = 0; i < ctx->nkinds; i++) {
     305           36 :     ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
     306              :   }
     307              : }
     308              : 
     309           20 : void update_layouts(const int nlevels, const int npts_global[nlevels][3],
     310              :                     const int npts_local[nlevels][3],
     311              :                     const int shift_local[nlevels][3],
     312              :                     const int border_width[nlevels][3],
     313              :                     const double dh[nlevels][3][3],
     314              :                     const double dh_inv[nlevels][3][3], grid_context *ctx) {
     315              : 
     316           20 :   assert(ctx != NULL);
     317           20 :   assert(ctx->checksum == ctx_checksum);
     318              : 
     319           20 :   if (ctx->layouts != NULL) {
     320           12 :     free(ctx->layouts);
     321              :   }
     322              : 
     323           20 :   ctx->layouts = malloc(sizeof(_layout) * nlevels);
     324              : 
     325          100 :   for (int level = 0; level < nlevels; level++) {
     326          320 :     for (int i = 0; i < 3; i++) {
     327          240 :       ctx->layouts[level].npts_global[i] = npts_global[level][i];
     328          240 :       ctx->layouts[level].npts_local[i] = npts_local[level][i];
     329          240 :       ctx->layouts[level].shift_local[i] = shift_local[level][i];
     330          240 :       ctx->layouts[level].border_width[i] = border_width[level][i];
     331          960 :       for (int j = 0; j < 3; j++) {
     332          720 :         ctx->layouts[level].dh[i][j] = dh[level][i][j];
     333          720 :         ctx->layouts[level].dh_inv[i][j] = dh_inv[level][i][j];
     334              :       }
     335              :     }
     336              :   }
     337           20 : }
     338              : 
     339           20 : void update_grid(const int nlevels, grid_context *ctx) {
     340           20 :   assert(ctx != NULL);
     341           20 :   assert(ctx->checksum == ctx_checksum);
     342              : 
     343           20 :   if (nlevels == 0)
     344              :     return;
     345              : 
     346           20 :   if (ctx->grid == NULL) {
     347            8 :     ctx->grid = malloc(sizeof(tensor) * nlevels);
     348              :   } else {
     349           12 :     if (ctx->nlevels_total < nlevels) {
     350            0 :       ctx->grid = realloc(ctx->grid, sizeof(tensor) * nlevels);
     351              :     }
     352              :   }
     353           20 :   assert(ctx->grid != NULL);
     354              : 
     355           20 :   ctx->nlevels_total = imax(ctx->nlevels_total, nlevels);
     356           20 :   ctx->nlevels = nlevels;
     357              : }
     358              : 
     359            8 : void *create_grid_context_dgemm(
     360              :     const bool orthorhombic, const int ntasks, const int nlevels,
     361              :     const int natoms, const int nkinds, const int nblocks,
     362              :     const int *block_offsets, const double atom_positions[natoms][3],
     363              :     const int *const atom_kinds, const grid_basis_set **const basis_sets,
     364              :     const int *const level_list, const int *const iatom_list,
     365              :     const int *jatom_list, const int *const iset_list,
     366              :     const int *const jset_list, const int *const ipgf_list,
     367              :     const int *const jpgf_list, const int *const border_mask_list,
     368              :     const int *block_num_list, const double *const radius_list,
     369              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     370              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     371              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     372              :     const double dh_inv[nlevels][3][3]) {
     373              : 
     374            8 :   grid_context *ctx = malloc(sizeof(grid_context));
     375              : 
     376            8 :   memset(ctx, 0, sizeof(grid_context));
     377              : 
     378            8 :   ctx->checksum = ctx_checksum;
     379            8 :   ctx->orthorhombic = orthorhombic;
     380            8 :   update_block_offsets(nblocks, block_offsets, ctx);
     381            8 :   update_atoms_position(natoms, atom_positions, ctx);
     382            8 :   update_atoms_kinds(natoms, atom_kinds, ctx);
     383            8 :   update_basis_set(nkinds, basis_sets, ctx);
     384            8 :   update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
     385              :                     iset_list, jset_list, ipgf_list, jpgf_list,
     386              :                     border_mask_list, block_num_list, radius_list, rab_list,
     387              :                     ctx);
     388            8 :   update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
     389              :                  dh, dh_inv, ctx);
     390            8 :   update_grid(nlevels, ctx);
     391              : 
     392            8 :   const int max_threads = omp_get_max_threads();
     393              : 
     394            8 :   ctx->handler =
     395            8 :       malloc(sizeof(struct collocation_integration_ *) * max_threads);
     396              : 
     397           16 :   for (int i = 0; i < max_threads; i++) {
     398            8 :     ctx->handler[i] = collocate_create_handle();
     399              :   }
     400              : 
     401            8 :   ctx->number_of_handler = max_threads;
     402              : 
     403            8 :   return ctx;
     404              : }
     405              : 
     406           12 : void update_grid_context_dgemm(
     407              :     const bool orthorhombic, const int ntasks, const int nlevels,
     408              :     const int natoms, const int nkinds, const int nblocks,
     409              :     const int *block_offsets, const double atom_positions[natoms][3],
     410              :     const int *const atom_kinds, const grid_basis_set **const basis_sets,
     411              :     const int *const level_list, const int *const iatom_list,
     412              :     const int *jatom_list, const int *const iset_list,
     413              :     const int *const jset_list, const int *const ipgf_list,
     414              :     const int *const jpgf_list, const int *const border_mask_list,
     415              :     const int *block_num_list, const double *const radius_list,
     416              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     417              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     418              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     419              :     const double dh_inv[nlevels][3][3], void *ptr) {
     420              : 
     421           12 :   assert(ptr != NULL);
     422           12 :   grid_context *ctx = (grid_context *)ptr;
     423           12 :   assert(ctx->checksum == ctx_checksum);
     424              : 
     425           12 :   ctx->orthorhombic = orthorhombic;
     426           12 :   update_block_offsets(nblocks, block_offsets, ctx);
     427           12 :   update_atoms_position(natoms, atom_positions, ctx);
     428           12 :   update_atoms_kinds(natoms, atom_kinds, ctx);
     429           12 :   update_basis_set(nkinds, basis_sets, ctx);
     430           12 :   update_task_lists(nlevels, ntasks, level_list, iatom_list, jatom_list,
     431              :                     iset_list, jset_list, ipgf_list, jpgf_list,
     432              :                     border_mask_list, block_num_list, radius_list, rab_list,
     433              :                     ctx);
     434           12 :   update_layouts(nlevels, npts_global, npts_local, shift_local, border_width,
     435              :                  dh, dh_inv, ctx);
     436           12 :   update_grid(nlevels, ctx);
     437              : 
     438              :   // Find largest Cartesian subblock size.
     439           12 :   ctx->maxco = 0;
     440           36 :   for (int i = 0; i < nkinds; i++) {
     441           24 :     ctx->maxco = imax(ctx->maxco, ctx->basis_sets[i]->maxco);
     442              :   }
     443           12 : }
     444              : 
     445            0 : void initialize_grid_context_on_gpu(void *ptr, const int number_of_devices,
     446              :                                     const int *device_id) {
     447            0 :   assert(ptr != NULL);
     448            0 :   grid_context *ctx = (grid_context *)ptr;
     449            0 :   assert(ctx->checksum == ctx_checksum);
     450            0 :   ctx->work_on_gpu = false;
     451            0 :   if (number_of_devices <= 0) {
     452              :     return;
     453              :   }
     454              : 
     455            0 :   ctx->number_of_devices = number_of_devices;
     456            0 :   ctx->queue_length = 8192;
     457            0 :   if (ctx->device_id == NULL) {
     458            0 :     ctx->device_id = malloc(sizeof(int) * number_of_devices);
     459              :   } else {
     460            0 :     ctx->device_id = realloc(ctx->device_id, sizeof(int) * number_of_devices);
     461              :   }
     462            0 :   assert(ctx->device_id != NULL);
     463              : 
     464            0 :   memcpy(ctx->device_id, device_id, sizeof(int) * number_of_devices);
     465              : }
     466              : 
     467            8 : void destroy_grid_context_dgemm(void *ptr) {
     468            8 :   assert(ptr);
     469            8 :   grid_context *ctx = (grid_context *)ptr;
     470            8 :   assert(ctx->checksum == ctx_checksum);
     471            8 :   free(ctx->block_offsets);
     472            8 :   free(ctx->atom_positions);
     473            8 :   free(ctx->atom_kinds);
     474            8 :   free(ctx->basis_sets);
     475            8 :   free(ctx->tasks[0]);
     476            8 :   free(ctx->tasks);
     477            8 :   free(ctx->tasks_per_level);
     478            8 :   free(ctx->layouts);
     479            8 :   free(ctx->grid);
     480            8 :   if (ctx->device_id)
     481            0 :     free(ctx->device_id);
     482              : 
     483            8 :   if (ctx->handler) {
     484           16 :     for (int i = 0; i < ctx->number_of_handler; i++) {
     485            8 :       collocate_destroy_handle(ctx->handler[i]);
     486              :     }
     487            8 :     free(ctx->handler);
     488              :   }
     489              : 
     490            8 :   free(ctx);
     491            8 : }
     492              : 
     493            0 : void apply_cutoff(void *ptr) {
     494            0 :   assert(ptr);
     495            0 :   grid_context *ctx = (grid_context *)ptr;
     496            0 :   assert(ctx->checksum == ctx_checksum);
     497            0 :   ctx->apply_cutoff = true;
     498            0 : }
     499              : 
     500         1280 : void set_grid_parameters(
     501              :     tensor *grid, const bool orthorhombic,
     502              :     const int grid_full_size[3],  /* size of the full grid */
     503              :     const int grid_local_size[3], /* size of the local grid block */
     504              :     const int shift_local[3],     /* coordinates of the lower coordinates of the
     505              :                                      local grid window */
     506              :     const int border_width[3],    /* width of the borders */
     507              :     const double
     508              :         dh[3][3], /* displacement vectors of the grid (cartesian) -> (ijk) */
     509              :     const double dh_inv[3][3], /* (ijk) -> (x,y,z) */
     510              :     offload_buffer *grid_) {
     511         1280 :   memset(grid, 0, sizeof(tensor));
     512         1280 :   initialize_tensor_3(grid, grid_local_size[2], grid_local_size[1],
     513              :                       grid_local_size[0]);
     514              : 
     515         1280 :   grid->data = grid_->host_buffer;
     516         1280 :   grid->ld_ = grid_local_size[0];
     517              : 
     518         1280 :   setup_global_grid_size(grid, &grid_full_size[0]);
     519              : 
     520              :   /* the grid is divided over several ranks or not periodic */
     521         1280 :   if ((grid_local_size[0] != grid_full_size[0]) ||
     522         1280 :       (grid_local_size[1] != grid_full_size[1]) ||
     523         1280 :       (grid_local_size[2] != grid_full_size[2])) {
     524            0 :     setup_grid_window(grid, shift_local, border_width, 0);
     525              :   } else {
     526         1280 :     grid->window_shift[0] = 0;
     527         1280 :     grid->window_shift[1] = 0;
     528         1280 :     grid->window_shift[2] = 0;
     529              : 
     530         1280 :     grid->window_size[0] = grid->size[0];
     531         1280 :     grid->window_size[1] = grid->size[1];
     532         1280 :     grid->window_size[2] = grid->size[2];
     533              :   }
     534              : 
     535         1280 :   grid->dh[0][0] = dh[0][0];
     536         1280 :   grid->dh[0][1] = dh[0][1];
     537         1280 :   grid->dh[0][2] = dh[0][2];
     538         1280 :   grid->dh[1][0] = dh[1][0];
     539         1280 :   grid->dh[1][1] = dh[1][1];
     540         1280 :   grid->dh[1][2] = dh[1][2];
     541         1280 :   grid->dh[2][0] = dh[2][0];
     542         1280 :   grid->dh[2][1] = dh[2][1];
     543         1280 :   grid->dh[2][2] = dh[2][2];
     544              : 
     545         1280 :   grid->dh_inv[0][0] = dh_inv[0][0];
     546         1280 :   grid->dh_inv[0][1] = dh_inv[0][1];
     547         1280 :   grid->dh_inv[0][2] = dh_inv[0][2];
     548         1280 :   grid->dh_inv[1][0] = dh_inv[1][0];
     549         1280 :   grid->dh_inv[1][1] = dh_inv[1][1];
     550         1280 :   grid->dh_inv[1][2] = dh_inv[1][2];
     551         1280 :   grid->dh_inv[2][0] = dh_inv[2][0];
     552         1280 :   grid->dh_inv[2][1] = dh_inv[2][1];
     553         1280 :   grid->dh_inv[2][2] = dh_inv[2][2];
     554              : 
     555         1280 :   verify_orthogonality(dh, grid->orthogonal);
     556              : 
     557         1280 :   if (orthorhombic) {
     558          672 :     grid->orthogonal[0] = true;
     559          672 :     grid->orthogonal[1] = true;
     560          672 :     grid->orthogonal[2] = true;
     561              :   }
     562         1280 : }
     563              : 
     564              : /*******************************************************************************
     565              :  * \brief Allocates a task list for the dgemm backend.
     566              :  *        See grid_task_list.h for details.
     567              :  ******************************************************************************/
     568           20 : void grid_dgemm_create_task_list(
     569              :     const bool orthorhombic, const int ntasks, const int nlevels,
     570              :     const int natoms, const int nkinds, const int nblocks,
     571              :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
     572              :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
     573              :     const int level_list[ntasks], const int iatom_list[ntasks],
     574              :     const int jatom_list[ntasks], const int iset_list[ntasks],
     575              :     const int jset_list[ntasks], const int ipgf_list[ntasks],
     576              :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
     577              :     const int block_num_list[ntasks], const double radius_list[ntasks],
     578              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
     579              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
     580              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
     581              :     const double dh_inv[nlevels][3][3], grid_dgemm_task_list **task_list) {
     582              : 
     583           20 :   if (*task_list == NULL) {
     584            8 :     *task_list = create_grid_context_dgemm(
     585              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     586              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     587              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     588              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     589              :         npts_local, shift_local, border_width, dh, dh_inv);
     590              :   } else {
     591           12 :     update_grid_context_dgemm(
     592              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     593              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     594              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     595              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     596              :         npts_local, shift_local, border_width, dh, dh_inv, *task_list);
     597              :   }
     598              : 
     599           20 :   const grid_library_config config = grid_library_get_config();
     600           20 :   if (config.apply_cutoff) {
     601            0 :     apply_cutoff(*task_list);
     602              :   }
     603           20 : }
     604              : 
     605              : /*******************************************************************************
     606              :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     607              :  ******************************************************************************/
     608            8 : void grid_dgemm_free_task_list(grid_dgemm_task_list *task_list) {
     609            8 :   destroy_grid_context_dgemm(task_list);
     610            8 : }
        

Generated by: LCOV version 2.0-1