LCOV - code coverage report
Current view: top level - src/grid - grid_task_list.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:85b8a9b) Lines: 77.4 % 208 161
Test Date: 2026-06-14 06:48:14 Functions: 100.0 % 4 4

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include <assert.h>
       9              : #include <math.h>
      10              : #include <stddef.h>
      11              : #include <stdio.h>
      12              : #include <stdlib.h>
      13              : #include <string.h>
      14              : 
      15              : #include "grid_task_list.h"
      16              : #include "grid_task_list_internal.h"
      17              : 
      18              : /*******************************************************************************
      19              :  * \brief Allocates a task list which can be passed to grid_collocate_task_list.
      20              :  *        See grid_task_list.h for details.
      21              :  * \author Ole Schuett
      22              :  ******************************************************************************/
      23        16158 : void grid_create_task_list(
      24              :     const bool orthorhombic, const int ntasks, const int nlevels,
      25              :     const int natoms, const int nkinds, const int nblocks,
      26              :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
      27              :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
      28              :     const int level_list[ntasks], const int iatom_list[ntasks],
      29              :     const int jatom_list[ntasks], const int iset_list[ntasks],
      30              :     const int jset_list[ntasks], const int ipgf_list[ntasks],
      31              :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
      32              :     const int block_num_list[ntasks], const double radius_list[ntasks],
      33              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
      34              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
      35              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
      36              :     const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
      37              : 
      38        16158 :   const grid_library_config config = grid_library_get_config();
      39              : 
      40        16158 :   grid_task_list_internal *task_list = NULL;
      41              : 
      42        16158 :   if (*task_list_out == NULL) {
      43         9802 :     task_list = malloc(sizeof(grid_task_list_internal));
      44              :     // not a proper handling of errors. assert is a debug tool
      45         9802 :     assert(task_list != NULL);
      46         9802 :     memset(task_list, 0, sizeof(grid_task_list_internal));
      47         9802 :     *task_list_out = task_list;
      48              : 
      49              :     // Resolve AUTO to a concrete backend.
      50         9802 :     if (config.backend == GRID_BACKEND_AUTO) {
      51              : #if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) &&                     \
      52              :     !defined(__NO_OFFLOAD_GRID)
      53              :       task_list->backend = GRID_BACKEND_GPU;
      54              : #else
      55         9782 :       task_list->backend = GRID_BACKEND_CPU;
      56              : #endif
      57              :     } else {
      58           20 :       task_list->backend = config.backend;
      59              :     }
      60              :   } else {
      61              :     // Reuse existing task list.
      62              :     task_list = (grid_task_list_internal *)*task_list_out;
      63              :   }
      64              : 
      65        16158 :   if ((nblocks == 0) || (ntasks == 0) || (nlevels == 0)) {
      66          245 :     task_list->empty = true;
      67          245 :     return;
      68              :   } else {
      69        15913 :     task_list->empty = false;
      70              :   }
      71              : 
      72        15913 :   size_t size = (size_t)nlevels * 3 * sizeof(int);
      73              : 
      74        15913 :   if (task_list->nlevels < nlevels) {
      75         9601 :     free(task_list->npts_local);
      76         9601 :     task_list->npts_local = malloc(size);
      77              :   }
      78              : 
      79              :   // Store npts_local for bounds checking and validation.
      80        15913 :   task_list->nlevels = nlevels;
      81        15913 :   assert(task_list->npts_local != NULL);
      82        15913 :   memcpy(task_list->npts_local, npts_local, size);
      83              : 
      84              :   // Always create reference backend because it might be needed for validation.
      85        15913 :   grid_ref_create_task_list(
      86              :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      87              :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      88              :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
      89              :       block_num_list, radius_list, rab_list, npts_global, npts_local,
      90              :       shift_local, border_width, dh, dh_inv, &task_list->ref);
      91              : 
      92              :   // Create other backend, if selected.
      93        15913 :   switch (task_list->backend) {
      94              :   case GRID_BACKEND_REF:
      95              :     break; // was already created above
      96        15891 :   case GRID_BACKEND_CPU:
      97        15891 :     grid_cpu_create_task_list(
      98              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      99              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     100              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     101              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     102              :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
     103        15891 :     break;
     104           19 :   case GRID_BACKEND_DGEMM:
     105           19 :     grid_dgemm_create_task_list(
     106              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     107              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     108              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     109              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     110              :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
     111           19 :     break;
     112              : 
     113            0 :   case GRID_BACKEND_GPU:
     114              : #if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) &&                     \
     115              :     !defined(__NO_OFFLOAD_GRID)
     116              :     grid_gpu_create_task_list(
     117              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     118              :         &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
     119              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     120              :         border_mask_list, block_num_list, radius_list, &rab_list[0][0],
     121              :         &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
     122              :         &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->gpu);
     123              : #else
     124            0 :     fprintf(stderr, "Error: The GPU grid backend is not available. "
     125              :                     "Please re-compile with -D__OFFLOAD");
     126            0 :     abort();
     127              : #endif
     128            0 :     break;
     129              : 
     130            0 :   default:
     131            0 :     printf("Error: Unknown grid backend: %i.\n", config.backend);
     132            0 :     abort();
     133        15913 :     break;
     134              :   }
     135              : }
     136              : 
     137              : /*******************************************************************************
     138              :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     139              :  * \author Ole Schuett
     140              :  ******************************************************************************/
     141         9802 : void grid_free_task_list(grid_task_list *ptr) {
     142         9802 :   if (ptr == NULL)
     143              :     return;
     144              : 
     145         9802 :   grid_task_list_internal *task_list = (grid_task_list_internal *)ptr;
     146              : 
     147         9802 :   if (task_list->ref != NULL) {
     148         9601 :     grid_ref_free_task_list(task_list->ref);
     149         9601 :     task_list->ref = NULL;
     150              :   }
     151         9802 :   if (task_list->cpu != NULL) {
     152         9591 :     grid_cpu_free_task_list(task_list->cpu);
     153         9591 :     task_list->cpu = NULL;
     154              :   }
     155         9802 :   if (task_list->dgemm != NULL) {
     156            7 :     grid_dgemm_free_task_list(task_list->dgemm);
     157            7 :     task_list->dgemm = NULL;
     158              :   }
     159              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     160              :   if (task_list->gpu != NULL) {
     161              :     grid_gpu_free_task_list(task_list->gpu);
     162              :     task_list->gpu = NULL;
     163              :   }
     164              : #endif
     165              : 
     166         9802 :   free(task_list->npts_local);
     167         9802 :   free(task_list);
     168              : }
     169              : 
     170              : /*******************************************************************************
     171              :  * \brief Collocate all tasks of in given list onto given grids.
     172              :  *        See grid_task_list.h for details.
     173              :  * \author Ole Schuett
     174              :  ******************************************************************************/
     175       234996 : void grid_collocate_task_list(const grid_task_list *ptr,
     176              :                               const enum grid_func func, const int nlevels,
     177              :                               const int npts_local[nlevels][3],
     178              :                               const offload_buffer *pab_blocks,
     179              :                               offload_buffer *grids[nlevels]) {
     180       234996 :   if (ptr == NULL)
     181              :     return;
     182              : 
     183       234996 :   grid_task_list_internal *task_list = (grid_task_list_internal *)ptr;
     184              : 
     185       234996 :   if (task_list->empty) {
     186        13277 :     for (int level = 0; level < nlevels; level++)
     187        10626 :       memset(grids[level]->host_buffer, 0, grids[level]->size);
     188              :     return;
     189              :   }
     190              : 
     191              :   // Bounds check.
     192       232345 :   assert(task_list->nlevels == nlevels);
     193      1150393 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     194       918048 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     195       918048 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     196       918048 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     197              :   }
     198              : 
     199       232345 :   switch (task_list->backend) {
     200           21 :   case GRID_BACKEND_REF:
     201           21 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     202              :                                  grids);
     203           21 :     break;
     204       232171 :   case GRID_BACKEND_CPU:
     205       232171 :     grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
     206              :                                  grids);
     207       232171 :     break;
     208          153 :   case GRID_BACKEND_DGEMM:
     209          153 :     grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
     210              :                                    grids);
     211          153 :     break;
     212              : #if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) &&                     \
     213              :     !defined(__NO_OFFLOAD_GRID)
     214              :   case GRID_BACKEND_GPU:
     215              :     grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
     216              :                                  grids);
     217              :     break;
     218              : #endif
     219            0 :   default:
     220            0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     221            0 :     abort();
     222       232345 :     break;
     223              :   }
     224              : 
     225              :   // Perform validation if enabled.
     226       232345 :   if (grid_library_get_config().validate) {
     227              :     // Allocate space for reference results.
     228           21 :     offload_buffer *grids_ref[nlevels];
     229          105 :     for (int level = 0; level < nlevels; level++) {
     230           84 :       const int npts_local_total =
     231           84 :           npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
     232           84 :       grids_ref[level] = NULL;
     233           84 :       offload_create_buffer(npts_local_total, &grids_ref[level]);
     234              :     }
     235              : 
     236              :     // Call reference implementation.
     237           21 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     238              :                                  grids_ref);
     239              : 
     240              :     // Compare results.
     241           21 :     const double tolerance = 1e-12;
     242           21 :     double max_rel_diff = 0.0;
     243          105 :     for (int level = 0; level < nlevels; level++) {
     244         1491 :       for (int i = 0; i < npts_local[level][0]; i++) {
     245        43692 :         for (int j = 0; j < npts_local[level][1]; j++) {
     246      1914354 :           for (int k = 0; k < npts_local[level][2]; k++) {
     247      1872069 :             const int idx = k * npts_local[level][1] * npts_local[level][0] +
     248      1872069 :                             j * npts_local[level][0] + i;
     249      1872069 :             const double ref_value = grids_ref[level]->host_buffer[idx];
     250      1872069 :             const double test_value = grids[level]->host_buffer[idx];
     251      1872069 :             const double diff = fabs(test_value - ref_value);
     252      1872069 :             const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     253      1872069 :             max_rel_diff = fmax(max_rel_diff, rel_diff);
     254      1872069 :             if (rel_diff > tolerance) {
     255            0 :               fprintf(stderr, "Error: Validation failure in grid collocate\n");
     256            0 :               fprintf(stderr, "   diff:     %le\n", diff);
     257            0 :               fprintf(stderr, "   rel_diff: %le\n", rel_diff);
     258            0 :               fprintf(stderr, "   value:    %le\n", ref_value);
     259            0 :               fprintf(stderr, "   level:    %i\n", level);
     260            0 :               fprintf(stderr, "   ijk:      %i  %i  %i\n", i, j, k);
     261            0 :               abort();
     262              :             }
     263              :           }
     264              :         }
     265              :       }
     266           84 :       offload_free_buffer(grids_ref[level]);
     267           84 :       printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
     268              :     }
     269              :   }
     270              : }
     271              : 
     272              : /*******************************************************************************
     273              :  * \brief Integrate all tasks of in given list from given grids.
     274              :  *        See grid_task_list.h for details.
     275              :  * \author Ole Schuett
     276              :  ******************************************************************************/
     277       214244 : void grid_integrate_task_list(const grid_task_list *ptr, const bool compute_tau,
     278              :                               const int natoms, const int nlevels,
     279              :                               const int npts_local[nlevels][3],
     280              :                               const offload_buffer *pab_blocks,
     281              :                               const offload_buffer *grids[nlevels],
     282              :                               offload_buffer *hab_blocks,
     283              :                               double forces[natoms][3], double virial[3][3]) {
     284              : 
     285       214244 :   if (ptr == NULL)
     286              :     return;
     287              : 
     288       214244 :   grid_task_list_internal *task_list = (grid_task_list_internal *)ptr;
     289              : 
     290       214244 :   if (task_list->empty) {
     291         2077 :     memset(hab_blocks->host_buffer, 0, hab_blocks->size);
     292         2077 :     if (virial) {
     293           24 :       virial[0][0] = 0.0;
     294           24 :       virial[0][1] = 0.0;
     295           24 :       virial[0][2] = 0.0;
     296           24 :       virial[1][0] = 0.0;
     297           24 :       virial[1][1] = 0.0;
     298           24 :       virial[1][2] = 0.0;
     299           24 :       virial[2][0] = 0.0;
     300           24 :       virial[2][1] = 0.0;
     301           24 :       virial[2][2] = 0.0;
     302              :     }
     303         2077 :     if (forces) {
     304          312 :       for (int atom = 0; atom < natoms; atom++) {
     305          198 :         forces[atom][0] = 0.0;
     306          198 :         forces[atom][1] = 0.0;
     307          198 :         forces[atom][2] = 0.0;
     308              :       }
     309              :     }
     310         2077 :     return;
     311              :   }
     312              : 
     313              :   // Bounds check.
     314       212167 :   assert(task_list->nlevels == nlevels);
     315      1051603 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     316       839436 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     317       839436 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     318       839436 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     319              :   }
     320              : 
     321       212167 :   assert(forces == NULL || pab_blocks != NULL);
     322       212167 :   assert(virial == NULL || pab_blocks != NULL);
     323              : 
     324       212167 :   switch (task_list->backend) {
     325              : #if (defined(__OFFLOAD_CUDA) || defined(__OFFLOAD_HIP)) &&                     \
     326              :     !defined(__NO_OFFLOAD_GRID)
     327              :   case GRID_BACKEND_GPU:
     328              :     grid_gpu_integrate_task_list(task_list->gpu, compute_tau, nlevels,
     329              :                                  pab_blocks, grids, hab_blocks, &forces[0][0],
     330              :                                  &virial[0][0]);
     331              :     break;
     332              : #endif
     333          149 :   case GRID_BACKEND_DGEMM:
     334          149 :     grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
     335              :                                    nlevels, pab_blocks, grids, hab_blocks,
     336              :                                    forces, virial);
     337          149 :     break;
     338       212001 :   case GRID_BACKEND_CPU:
     339       212001 :     grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
     340              :                                  pab_blocks, grids, hab_blocks, forces, virial);
     341       212001 :     break;
     342           17 :   case GRID_BACKEND_REF:
     343           17 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     344              :                                  pab_blocks, grids, hab_blocks, forces, virial);
     345           17 :     break;
     346            0 :   default:
     347            0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     348            0 :     abort();
     349       212167 :     break;
     350              :   }
     351              : 
     352              :   // Perform validation if enabled.
     353       212167 :   if (grid_library_get_config().validate) {
     354              :     // Allocate space for reference results.
     355           17 :     const int hab_length = hab_blocks->size / sizeof(double);
     356           17 :     offload_buffer *hab_blocks_ref = NULL;
     357           17 :     offload_create_buffer(hab_length, &hab_blocks_ref);
     358           17 :     double forces_ref[natoms][3], virial_ref[3][3];
     359              : 
     360              :     // Call reference implementation.
     361           34 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     362              :                                  pab_blocks, grids, hab_blocks_ref,
     363              :                                  (forces != NULL) ? forces_ref : NULL,
     364              :                                  (virial != NULL) ? virial_ref : NULL);
     365              : 
     366              :     // Compare hab.
     367           17 :     const double hab_tolerance = 1e-12;
     368           17 :     double hab_max_rel_diff = 0.0;
     369         2178 :     for (int i = 0; i < hab_length; i++) {
     370         2161 :       const double ref_value = hab_blocks_ref->host_buffer[i];
     371         2161 :       const double test_value = hab_blocks->host_buffer[i];
     372         2161 :       const double diff = fabs(test_value - ref_value);
     373         2161 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     374         2161 :       hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
     375         2161 :       if (rel_diff > hab_tolerance) {
     376            0 :         fprintf(stderr, "Error: Validation failure in grid integrate\n");
     377            0 :         fprintf(stderr, "   hab diff:     %le\n", diff);
     378            0 :         fprintf(stderr, "   hab rel_diff: %le\n", rel_diff);
     379            0 :         fprintf(stderr, "   hab value:    %le\n", ref_value);
     380            0 :         fprintf(stderr, "   hab i:        %i\n", i);
     381            0 :         abort();
     382              :       }
     383              :     }
     384              : 
     385              :     // Compare forces.
     386           17 :     const double forces_tolerance = 1e-8; // account for higher numeric noise
     387           17 :     double forces_max_rel_diff = 0.0;
     388           17 :     if (forces != NULL) {
     389           12 :       for (int iatom = 0; iatom < natoms; iatom++) {
     390           36 :         for (int idir = 0; idir < 3; idir++) {
     391           27 :           const double ref_value = forces_ref[iatom][idir];
     392           27 :           const double test_value = forces[iatom][idir];
     393           27 :           const double diff = fabs(test_value - ref_value);
     394           27 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     395           27 :           forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
     396           27 :           if (rel_diff > forces_tolerance) {
     397            0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     398            0 :             fprintf(stderr, "   forces diff:     %le\n", diff);
     399            0 :             fprintf(stderr, "   forces rel_diff: %le\n", rel_diff);
     400            0 :             fprintf(stderr, "   forces value:    %le\n", ref_value);
     401            0 :             fprintf(stderr, "   forces atom:     %i\n", iatom);
     402            0 :             fprintf(stderr, "   forces dir:      %i\n", idir);
     403            0 :             abort();
     404              :           }
     405              :         }
     406              :       }
     407              :     }
     408              : 
     409              :     // Compare virial.
     410           17 :     const double virial_tolerance = 1e-8; // account for higher numeric noise
     411           17 :     double virial_max_rel_diff = 0.0;
     412           17 :     if (virial != NULL) {
     413            0 :       for (int i = 0; i < 3; i++) {
     414            0 :         for (int j = 0; j < 3; j++) {
     415            0 :           const double ref_value = virial_ref[i][j];
     416            0 :           const double test_value = virial[i][j];
     417            0 :           const double diff = fabs(test_value - ref_value);
     418            0 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     419            0 :           virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
     420            0 :           if (rel_diff > virial_tolerance) {
     421            0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     422            0 :             fprintf(stderr, "   virial diff:     %le\n", diff);
     423            0 :             fprintf(stderr, "   virial rel_diff: %le\n", rel_diff);
     424            0 :             fprintf(stderr, "   virial value:    %le\n", ref_value);
     425            0 :             fprintf(stderr, "   virial ij:       %i  %i\n", i, j);
     426            0 :             abort();
     427              :           }
     428              :         }
     429              :       }
     430              :     }
     431              : 
     432           17 :     printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
     433              :            hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
     434           17 :     offload_free_buffer(hab_blocks_ref);
     435              :   }
     436              : }
     437              : 
     438              : // EOF
        

Generated by: LCOV version 2.0-1