LCOV - code coverage report
Current view: top level - src/grid - grid_task_list.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:7641cd9) Lines: 74.0 % 181 134
Test Date: 2026-05-25 07:16:39 Functions: 100.0 % 4 4

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2026 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include <assert.h>
       9              : #include <math.h>
      10              : #include <stddef.h>
      11              : #include <stdio.h>
      12              : #include <stdlib.h>
      13              : #include <string.h>
      14              : 
      15              : #include "common/grid_common.h"
      16              : #include "common/grid_constants.h"
      17              : #include "common/grid_library.h"
      18              : #include "grid_task_list.h"
      19              : 
      20              : /*******************************************************************************
      21              :  * \brief Allocates a task list which can be passed to grid_collocate_task_list.
      22              :  *        See grid_task_list.h for details.
      23              :  * \author Ole Schuett
      24              :  ******************************************************************************/
      25        15802 : void grid_create_task_list(
      26              :     const bool orthorhombic, const int ntasks, const int nlevels,
      27              :     const int natoms, const int nkinds, const int nblocks,
      28              :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
      29              :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
      30              :     const int level_list[ntasks], const int iatom_list[ntasks],
      31              :     const int jatom_list[ntasks], const int iset_list[ntasks],
      32              :     const int jset_list[ntasks], const int ipgf_list[ntasks],
      33              :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
      34              :     const int block_num_list[ntasks], const double radius_list[ntasks],
      35              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
      36              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
      37              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
      38              :     const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
      39              : 
      40        15802 :   const grid_library_config config = grid_library_get_config();
      41              : 
      42        15802 :   grid_task_list *task_list = NULL;
      43              : 
      44        15802 :   if (*task_list_out == NULL) {
      45         9584 :     task_list = malloc(sizeof(grid_task_list));
      46         9584 :     assert(task_list != NULL);
      47         9584 :     memset(task_list, 0, sizeof(grid_task_list));
      48              : 
      49              :     // Resolve AUTO to a concrete backend.
      50         9584 :     if (config.backend == GRID_BACKEND_AUTO) {
      51              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
      52              :       task_list->backend = GRID_BACKEND_GPU;
      53              : #else
      54         9564 :       task_list->backend = GRID_BACKEND_CPU;
      55              : #endif
      56              :     } else {
      57           20 :       task_list->backend = config.backend;
      58              :     }
      59              :   } else {
      60              :     // Reuse existing task list.
      61         6218 :     task_list = *task_list_out;
      62         6218 :     free(task_list->npts_local);
      63              :   }
      64              : 
      65              :   // Store npts_local for bounds checking and validation.
      66        15802 :   task_list->nlevels = nlevels;
      67        15802 :   size_t size = nlevels * 3 * sizeof(int);
      68        15802 :   task_list->npts_local = malloc(size);
      69        15802 :   assert(task_list->npts_local != NULL);
      70        15802 :   memcpy(task_list->npts_local, npts_local, size);
      71              : 
      72              :   // Always create reference backend because it might be needed for validation.
      73        15802 :   grid_ref_create_task_list(
      74              :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      75              :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      76              :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
      77              :       block_num_list, radius_list, rab_list, npts_global, npts_local,
      78              :       shift_local, border_width, dh, dh_inv, &task_list->ref);
      79              : 
      80              :   // Create other backend, if selected.
      81        15802 :   switch (task_list->backend) {
      82              :   case GRID_BACKEND_REF:
      83              :     break; // was already created above
      84        15778 :   case GRID_BACKEND_CPU:
      85        15778 :     grid_cpu_create_task_list(
      86              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      87              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      88              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
      89              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
      90              :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
      91        15778 :     break;
      92           20 :   case GRID_BACKEND_DGEMM:
      93           20 :     grid_dgemm_create_task_list(
      94              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      95              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      96              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
      97              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
      98              :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
      99           20 :     break;
     100              : 
     101            0 :   case GRID_BACKEND_GPU:
     102              : #if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
     103              :     grid_gpu_create_task_list(
     104              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     105              :         &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
     106              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     107              :         border_mask_list, block_num_list, radius_list, &rab_list[0][0],
     108              :         &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
     109              :         &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->gpu);
     110              : #else
     111            0 :     fprintf(stderr, "Error: The GPU grid backend is not available. "
     112              :                     "Please re-compile with -D__OFFLOAD");
     113            0 :     abort();
     114              : #endif
     115            0 :     break;
     116              : 
     117            0 :   default:
     118            0 :     printf("Error: Unknown grid backend: %i.\n", config.backend);
     119            0 :     abort();
     120        15802 :     break;
     121              :   }
     122              : 
     123        15802 :   *task_list_out = task_list;
     124        15802 : }
     125              : 
     126              : /*******************************************************************************
     127              :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     128              :  * \author Ole Schuett
     129              :  ******************************************************************************/
     130         9584 : void grid_free_task_list(grid_task_list *task_list) {
     131              : 
     132         9584 :   if (task_list->ref != NULL) {
     133         9584 :     grid_ref_free_task_list(task_list->ref);
     134         9584 :     task_list->ref = NULL;
     135              :   }
     136         9584 :   if (task_list->cpu != NULL) {
     137         9572 :     grid_cpu_free_task_list(task_list->cpu);
     138         9572 :     task_list->cpu = NULL;
     139              :   }
     140         9584 :   if (task_list->dgemm != NULL) {
     141            8 :     grid_dgemm_free_task_list(task_list->dgemm);
     142            8 :     task_list->dgemm = NULL;
     143              :   }
     144              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     145              :   if (task_list->gpu != NULL) {
     146              :     grid_gpu_free_task_list(task_list->gpu);
     147              :     task_list->gpu = NULL;
     148              :   }
     149              : #endif
     150              : 
     151         9584 :   free(task_list->npts_local);
     152         9584 :   free(task_list);
     153         9584 : }
     154              : 
     155              : /*******************************************************************************
     156              :  * \brief Collocate all tasks of in given list onto given grids.
     157              :  *        See grid_task_list.h for details.
     158              :  * \author Ole Schuett
     159              :  ******************************************************************************/
     160       230860 : void grid_collocate_task_list(const grid_task_list *task_list,
     161              :                               const enum grid_func func, const int nlevels,
     162              :                               const int npts_local[nlevels][3],
     163              :                               const offload_buffer *pab_blocks,
     164              :                               offload_buffer *grids[nlevels]) {
     165              : 
     166              :   // Bounds check.
     167       230860 :   assert(task_list->nlevels == nlevels);
     168      1145540 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     169       914680 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     170       914680 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     171       914680 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     172              :   }
     173              : 
     174       230860 :   switch (task_list->backend) {
     175           30 :   case GRID_BACKEND_REF:
     176           30 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     177              :                                  grids);
     178           30 :     break;
     179       230668 :   case GRID_BACKEND_CPU:
     180       230668 :     grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
     181              :                                  grids);
     182       230668 :     break;
     183          162 :   case GRID_BACKEND_DGEMM:
     184          162 :     grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
     185              :                                    grids);
     186          162 :     break;
     187              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     188              :   case GRID_BACKEND_GPU:
     189              :     grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
     190              :                                  grids);
     191              :     break;
     192              : #endif
     193            0 :   default:
     194            0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     195            0 :     abort();
     196       230860 :     break;
     197              :   }
     198              : 
     199              :   // Perform validation if enabled.
     200       230860 :   if (grid_library_get_config().validate) {
     201              :     // Allocate space for reference results.
     202           30 :     offload_buffer *grids_ref[nlevels];
     203          150 :     for (int level = 0; level < nlevels; level++) {
     204          120 :       const int npts_local_total =
     205          120 :           npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
     206          120 :       grids_ref[level] = NULL;
     207          120 :       offload_create_buffer(npts_local_total, &grids_ref[level]);
     208              :     }
     209              : 
     210              :     // Call reference implementation.
     211           30 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     212              :                                  grids_ref);
     213              : 
     214              :     // Compare results.
     215           30 :     const double tolerance = 1e-12;
     216           30 :     double max_rel_diff = 0.0;
     217          150 :     for (int level = 0; level < nlevels; level++) {
     218         2526 :       for (int i = 0; i < npts_local[level][0]; i++) {
     219        82680 :         for (int j = 0; j < npts_local[level][1]; j++) {
     220      3671568 :           for (int k = 0; k < npts_local[level][2]; k++) {
     221      3591294 :             const int idx = k * npts_local[level][1] * npts_local[level][0] +
     222      3591294 :                             j * npts_local[level][0] + i;
     223      3591294 :             const double ref_value = grids_ref[level]->host_buffer[idx];
     224      3591294 :             const double test_value = grids[level]->host_buffer[idx];
     225      3591294 :             const double diff = fabs(test_value - ref_value);
     226      3591294 :             const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     227      3591294 :             max_rel_diff = fmax(max_rel_diff, rel_diff);
     228      3591294 :             if (rel_diff > tolerance) {
     229            0 :               fprintf(stderr, "Error: Validation failure in grid collocate\n");
     230            0 :               fprintf(stderr, "   diff:     %le\n", diff);
     231            0 :               fprintf(stderr, "   rel_diff: %le\n", rel_diff);
     232            0 :               fprintf(stderr, "   value:    %le\n", ref_value);
     233            0 :               fprintf(stderr, "   level:    %i\n", level);
     234            0 :               fprintf(stderr, "   ijk:      %i  %i  %i\n", i, j, k);
     235            0 :               abort();
     236              :             }
     237              :           }
     238              :         }
     239              :       }
     240          120 :       offload_free_buffer(grids_ref[level]);
     241          120 :       printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
     242              :     }
     243              :   }
     244       230860 : }
     245              : 
     246              : /*******************************************************************************
     247              :  * \brief Integrate all tasks of in given list from given grids.
     248              :  *        See grid_task_list.h for details.
     249              :  * \author Ole Schuett
     250              :  ******************************************************************************/
     251       210640 : void grid_integrate_task_list(
     252              :     const grid_task_list *task_list, const bool compute_tau, const int natoms,
     253              :     const int nlevels, const int npts_local[nlevels][3],
     254              :     const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
     255              :     offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
     256              : 
     257              :   // Bounds check.
     258       210640 :   assert(task_list->nlevels == nlevels);
     259      1045316 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     260       834676 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     261       834676 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     262       834676 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     263              :   }
     264              : 
     265       210640 :   assert(forces == NULL || pab_blocks != NULL);
     266       210640 :   assert(virial == NULL || pab_blocks != NULL);
     267              : 
     268       210640 :   switch (task_list->backend) {
     269              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     270              :   case GRID_BACKEND_GPU:
     271              :     grid_gpu_integrate_task_list(task_list->gpu, compute_tau, nlevels,
     272              :                                  pab_blocks, grids, hab_blocks, &forces[0][0],
     273              :                                  &virial[0][0]);
     274              :     break;
     275              : #endif
     276          158 :   case GRID_BACKEND_DGEMM:
     277          158 :     grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
     278              :                                    nlevels, pab_blocks, grids, hab_blocks,
     279              :                                    forces, virial);
     280          158 :     break;
     281       210456 :   case GRID_BACKEND_CPU:
     282       210456 :     grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
     283              :                                  pab_blocks, grids, hab_blocks, forces, virial);
     284       210456 :     break;
     285           26 :   case GRID_BACKEND_REF:
     286           26 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     287              :                                  pab_blocks, grids, hab_blocks, forces, virial);
     288           26 :     break;
     289            0 :   default:
     290            0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     291            0 :     abort();
     292       210640 :     break;
     293              :   }
     294              : 
     295              :   // Perform validation if enabled.
     296       210640 :   if (grid_library_get_config().validate) {
     297              :     // Allocate space for reference results.
     298           26 :     const int hab_length = hab_blocks->size / sizeof(double);
     299           26 :     offload_buffer *hab_blocks_ref = NULL;
     300           26 :     offload_create_buffer(hab_length, &hab_blocks_ref);
     301           26 :     double forces_ref[natoms][3], virial_ref[3][3];
     302              : 
     303              :     // Call reference implementation.
     304           52 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     305              :                                  pab_blocks, grids, hab_blocks_ref,
     306              :                                  (forces != NULL) ? forces_ref : NULL,
     307              :                                  (virial != NULL) ? virial_ref : NULL);
     308              : 
     309              :     // Compare hab.
     310           26 :     const double hab_tolerance = 1e-12;
     311           26 :     double hab_max_rel_diff = 0.0;
     312         2187 :     for (int i = 0; i < hab_length; i++) {
     313         2161 :       const double ref_value = hab_blocks_ref->host_buffer[i];
     314         2161 :       const double test_value = hab_blocks->host_buffer[i];
     315         2161 :       const double diff = fabs(test_value - ref_value);
     316         2161 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     317         2161 :       hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
     318         2161 :       if (rel_diff > hab_tolerance) {
     319            0 :         fprintf(stderr, "Error: Validation failure in grid integrate\n");
     320            0 :         fprintf(stderr, "   hab diff:     %le\n", diff);
     321            0 :         fprintf(stderr, "   hab rel_diff: %le\n", rel_diff);
     322            0 :         fprintf(stderr, "   hab value:    %le\n", ref_value);
     323            0 :         fprintf(stderr, "   hab i:        %i\n", i);
     324            0 :         abort();
     325              :       }
     326              :     }
     327              : 
     328              :     // Compare forces.
     329           26 :     const double forces_tolerance = 1e-8; // account for higher numeric noise
     330           26 :     double forces_max_rel_diff = 0.0;
     331           26 :     if (forces != NULL) {
     332           14 :       for (int iatom = 0; iatom < natoms; iatom++) {
     333           40 :         for (int idir = 0; idir < 3; idir++) {
     334           30 :           const double ref_value = forces_ref[iatom][idir];
     335           30 :           const double test_value = forces[iatom][idir];
     336           30 :           const double diff = fabs(test_value - ref_value);
     337           30 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     338           30 :           forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
     339           30 :           if (rel_diff > forces_tolerance) {
     340            0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     341            0 :             fprintf(stderr, "   forces diff:     %le\n", diff);
     342            0 :             fprintf(stderr, "   forces rel_diff: %le\n", rel_diff);
     343            0 :             fprintf(stderr, "   forces value:    %le\n", ref_value);
     344            0 :             fprintf(stderr, "   forces atom:     %i\n", iatom);
     345            0 :             fprintf(stderr, "   forces dir:      %i\n", idir);
     346            0 :             abort();
     347              :           }
     348              :         }
     349              :       }
     350              :     }
     351              : 
     352              :     // Compare virial.
     353           26 :     const double virial_tolerance = 1e-8; // account for higher numeric noise
     354           26 :     double virial_max_rel_diff = 0.0;
     355           26 :     if (virial != NULL) {
     356            0 :       for (int i = 0; i < 3; i++) {
     357            0 :         for (int j = 0; j < 3; j++) {
     358            0 :           const double ref_value = virial_ref[i][j];
     359            0 :           const double test_value = virial[i][j];
     360            0 :           const double diff = fabs(test_value - ref_value);
     361            0 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     362            0 :           virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
     363            0 :           if (rel_diff > virial_tolerance) {
     364            0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     365            0 :             fprintf(stderr, "   virial diff:     %le\n", diff);
     366            0 :             fprintf(stderr, "   virial rel_diff: %le\n", rel_diff);
     367            0 :             fprintf(stderr, "   virial value:    %le\n", ref_value);
     368            0 :             fprintf(stderr, "   virial ij:       %i  %i\n", i, j);
     369            0 :             abort();
     370              :           }
     371              :         }
     372              :       }
     373              :     }
     374              : 
     375           26 :     printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
     376              :            hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
     377           26 :     offload_free_buffer(hab_blocks_ref);
     378              :   }
     379       210640 : }
     380              : 
     381              : // EOF
        

Generated by: LCOV version 2.0-1