LCOV - code coverage report
Current view: top level - src/grid - grid_task_list.c (source / functions) Coverage Total Hit
Test: CP2K Regtests (git:936074a) Lines: 72.4 % 185 134
Test Date: 2025-12-04 06:27:48 Functions: 100.0 % 4 4

            Line data    Source code
       1              : /*----------------------------------------------------------------------------*/
       2              : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3              : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4              : /*                                                                            */
       5              : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6              : /*----------------------------------------------------------------------------*/
       7              : 
       8              : #include <assert.h>
       9              : #include <math.h>
      10              : #include <stddef.h>
      11              : #include <stdio.h>
      12              : #include <stdlib.h>
      13              : #include <string.h>
      14              : 
      15              : #include "common/grid_common.h"
      16              : #include "common/grid_constants.h"
      17              : #include "common/grid_library.h"
      18              : #include "grid_task_list.h"
      19              : 
      20              : /*******************************************************************************
      21              :  * \brief Allocates a task list which can be passed to grid_collocate_task_list.
      22              :  *        See grid_task_list.h for details.
      23              :  * \author Ole Schuett
      24              :  ******************************************************************************/
      25        14162 : void grid_create_task_list(
      26              :     const bool orthorhombic, const int ntasks, const int nlevels,
      27              :     const int natoms, const int nkinds, const int nblocks,
      28              :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
      29              :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
      30              :     const int level_list[ntasks], const int iatom_list[ntasks],
      31              :     const int jatom_list[ntasks], const int iset_list[ntasks],
      32              :     const int jset_list[ntasks], const int ipgf_list[ntasks],
      33              :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
      34              :     const int block_num_list[ntasks], const double radius_list[ntasks],
      35              :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
      36              :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
      37              :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
      38              :     const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
      39              : 
      40        14162 :   const grid_library_config config = grid_library_get_config();
      41              : 
      42        14162 :   grid_task_list *task_list = NULL;
      43              : 
      44        14162 :   if (*task_list_out == NULL) {
      45         8484 :     task_list = malloc(sizeof(grid_task_list));
      46         8484 :     assert(task_list != NULL);
      47         8484 :     memset(task_list, 0, sizeof(grid_task_list));
      48              : 
      49              :     // Resolve AUTO to a concrete backend.
      50         8484 :     if (config.backend == GRID_BACKEND_AUTO) {
      51              : 
      52              : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
      53              :       task_list->backend = GRID_BACKEND_HIP;
      54              : #elif defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
      55              :       task_list->backend = GRID_BACKEND_GPU;
      56              : #else
      57         8464 :       task_list->backend = GRID_BACKEND_CPU;
      58              : #endif
      59              :     } else {
      60           20 :       task_list->backend = config.backend;
      61              :     }
      62              :   } else {
      63              :     // Reuse existing task list.
      64         5678 :     task_list = *task_list_out;
      65         5678 :     free(task_list->npts_local);
      66              :   }
      67              : 
      68              :   // Store npts_local for bounds checking and validation.
      69        14162 :   task_list->nlevels = nlevels;
      70        14162 :   size_t size = nlevels * 3 * sizeof(int);
      71        14162 :   task_list->npts_local = malloc(size);
      72        14162 :   assert(task_list->npts_local != NULL);
      73        14162 :   memcpy(task_list->npts_local, npts_local, size);
      74              : 
      75              :   // Always create reference backend because it might be needed for validation.
      76        14162 :   grid_ref_create_task_list(
      77              :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      78              :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      79              :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
      80              :       block_num_list, radius_list, rab_list, npts_global, npts_local,
      81              :       shift_local, border_width, dh, dh_inv, &task_list->ref);
      82              : 
      83              :   // Create other backend, if selected.
      84        14162 :   switch (task_list->backend) {
      85              :   case GRID_BACKEND_REF:
      86              :     break; // was already created above
      87        14138 :   case GRID_BACKEND_CPU:
      88        14138 :     grid_cpu_create_task_list(
      89              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      90              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      91              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
      92              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
      93              :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
      94        14138 :     break;
      95           20 :   case GRID_BACKEND_DGEMM:
      96           20 :     grid_dgemm_create_task_list(
      97              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      98              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      99              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     100              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     101              :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
     102           20 :     break;
     103              : 
     104            0 :   case GRID_BACKEND_GPU:
     105              : #if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
     106              :     grid_gpu_create_task_list(
     107              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     108              :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     109              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     110              :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     111              :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->gpu);
     112              : #else
     113            0 :     fprintf(stderr,
     114              :             "Error: The GPU grid backend is not available. "
     115              :             "Please re-compile with -D__OFFLOAD_CUDA or -D__OFFLOAD_HIP");
     116            0 :     abort();
     117              : #endif
     118            0 :     break;
     119              : 
     120            0 :   case GRID_BACKEND_HIP:
     121              : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     122              :     grid_hip_create_task_list(
     123              :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     124              :         &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
     125              :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     126              :         border_mask_list, block_num_list, radius_list, &rab_list[0][0],
     127              :         &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
     128              :         &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->hip);
     129              : #else
     130            0 :     fprintf(stderr, "Error: The HIP grid backend is not available. "
     131              :                     "Please re-compile with -D__OFFLOAD_HIP");
     132            0 :     abort();
     133              : #endif
     134            0 :     break;
     135              : 
     136            0 :   default:
     137            0 :     printf("Error: Unknown grid backend: %i.\n", config.backend);
     138            0 :     abort();
     139        14162 :     break;
     140              :   }
     141              : 
     142        14162 :   *task_list_out = task_list;
     143        14162 : }
     144              : 
     145              : /*******************************************************************************
     146              :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     147              :  * \author Ole Schuett
     148              :  ******************************************************************************/
     149         8484 : void grid_free_task_list(grid_task_list *task_list) {
     150              : 
     151         8484 :   if (task_list->ref != NULL) {
     152         8484 :     grid_ref_free_task_list(task_list->ref);
     153         8484 :     task_list->ref = NULL;
     154              :   }
     155         8484 :   if (task_list->cpu != NULL) {
     156         8472 :     grid_cpu_free_task_list(task_list->cpu);
     157         8472 :     task_list->cpu = NULL;
     158              :   }
     159         8484 :   if (task_list->dgemm != NULL) {
     160            8 :     grid_dgemm_free_task_list(task_list->dgemm);
     161            8 :     task_list->dgemm = NULL;
     162              :   }
     163              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     164              :   if (task_list->gpu != NULL) {
     165              :     grid_gpu_free_task_list(task_list->gpu);
     166              :     task_list->gpu = NULL;
     167              :   }
     168              : #endif
     169              : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     170              :   if (task_list->hip != NULL) {
     171              :     grid_hip_free_task_list(task_list->hip);
     172              :     task_list->hip = NULL;
     173              :   }
     174              : #endif
     175              : 
     176         8484 :   free(task_list->npts_local);
     177         8484 :   free(task_list);
     178         8484 : }
     179              : 
     180              : /*******************************************************************************
     181              :  * \brief Collocate all tasks of in given list onto given grids.
     182              :  *        See grid_task_list.h for details.
     183              :  * \author Ole Schuett
     184              :  ******************************************************************************/
     185       208212 : void grid_collocate_task_list(const grid_task_list *task_list,
     186              :                               const enum grid_func func, const int nlevels,
     187              :                               const int npts_local[nlevels][3],
     188              :                               const offload_buffer *pab_blocks,
     189              :                               offload_buffer *grids[nlevels]) {
     190              : 
     191              :   // Bounds check.
     192       208212 :   assert(task_list->nlevels == nlevels);
     193      1032632 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     194       824420 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     195       824420 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     196       824420 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     197              :   }
     198              : 
     199       208212 :   switch (task_list->backend) {
     200           30 :   case GRID_BACKEND_REF:
     201           30 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     202              :                                  grids);
     203           30 :     break;
     204       208020 :   case GRID_BACKEND_CPU:
     205       208020 :     grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
     206              :                                  grids);
     207       208020 :     break;
     208          162 :   case GRID_BACKEND_DGEMM:
     209          162 :     grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
     210              :                                    grids);
     211          162 :     break;
     212              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     213              :   case GRID_BACKEND_GPU:
     214              :     grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
     215              :                                  grids);
     216              :     break;
     217              : #endif
     218              : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     219              :   case GRID_BACKEND_HIP:
     220              :     grid_hip_collocate_task_list(task_list->hip, func, nlevels, pab_blocks,
     221              :                                  grids);
     222              :     break;
     223              : #endif
     224            0 :   default:
     225            0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     226            0 :     abort();
     227       208212 :     break;
     228              :   }
     229              : 
     230              :   // Perform validation if enabled.
     231       208212 :   if (grid_library_get_config().validate) {
     232              :     // Allocate space for reference results.
     233           30 :     offload_buffer *grids_ref[nlevels];
     234          150 :     for (int level = 0; level < nlevels; level++) {
     235          120 :       const int npts_local_total =
     236          120 :           npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
     237          120 :       grids_ref[level] = NULL;
     238          120 :       offload_create_buffer(npts_local_total, &grids_ref[level]);
     239              :     }
     240              : 
     241              :     // Call reference implementation.
     242           30 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     243              :                                  grids_ref);
     244              : 
     245              :     // Compare results.
     246           30 :     const double tolerance = 1e-12;
     247           30 :     double max_rel_diff = 0.0;
     248          150 :     for (int level = 0; level < nlevels; level++) {
     249         2526 :       for (int i = 0; i < npts_local[level][0]; i++) {
     250        82680 :         for (int j = 0; j < npts_local[level][1]; j++) {
     251      3671568 :           for (int k = 0; k < npts_local[level][2]; k++) {
     252      3591294 :             const int idx = k * npts_local[level][1] * npts_local[level][0] +
     253      3591294 :                             j * npts_local[level][0] + i;
     254      3591294 :             const double ref_value = grids_ref[level]->host_buffer[idx];
     255      3591294 :             const double test_value = grids[level]->host_buffer[idx];
     256      3591294 :             const double diff = fabs(test_value - ref_value);
     257      3591294 :             const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     258      3591294 :             max_rel_diff = fmax(max_rel_diff, rel_diff);
     259      3591294 :             if (rel_diff > tolerance) {
     260            0 :               fprintf(stderr, "Error: Validation failure in grid collocate\n");
     261            0 :               fprintf(stderr, "   diff:     %le\n", diff);
     262            0 :               fprintf(stderr, "   rel_diff: %le\n", rel_diff);
     263            0 :               fprintf(stderr, "   value:    %le\n", ref_value);
     264            0 :               fprintf(stderr, "   level:    %i\n", level);
     265            0 :               fprintf(stderr, "   ijk:      %i  %i  %i\n", i, j, k);
     266            0 :               abort();
     267              :             }
     268              :           }
     269              :         }
     270              :       }
     271          120 :       offload_free_buffer(grids_ref[level]);
     272          120 :       printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
     273              :     }
     274              :   }
     275       208212 : }
     276              : 
     277              : /*******************************************************************************
     278              :  * \brief Integrate all tasks of in given list from given grids.
     279              :  *        See grid_task_list.h for details.
     280              :  * \author Ole Schuett
     281              :  ******************************************************************************/
     282       190844 : void grid_integrate_task_list(
     283              :     const grid_task_list *task_list, const bool compute_tau, const int natoms,
     284              :     const int nlevels, const int npts_local[nlevels][3],
     285              :     const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
     286              :     offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
     287              : 
     288              :   // Bounds check.
     289       190844 :   assert(task_list->nlevels == nlevels);
     290       946058 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     291       755214 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     292       755214 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     293       755214 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     294              :   }
     295              : 
     296       190844 :   assert(forces == NULL || pab_blocks != NULL);
     297       190844 :   assert(virial == NULL || pab_blocks != NULL);
     298              : 
     299       190844 :   switch (task_list->backend) {
     300              : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     301              :   case GRID_BACKEND_GPU:
     302              :     grid_gpu_integrate_task_list(task_list->gpu, compute_tau, natoms, nlevels,
     303              :                                  pab_blocks, grids, hab_blocks, forces, virial);
     304              :     break;
     305              : #endif
     306              : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     307              :   case GRID_BACKEND_HIP:
     308              :     grid_hip_integrate_task_list(task_list->hip, compute_tau, nlevels,
     309              :                                  pab_blocks, grids, hab_blocks, &forces[0][0],
     310              :                                  &virial[0][0]);
     311              :     break;
     312              : #endif
     313          158 :   case GRID_BACKEND_DGEMM:
     314          158 :     grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
     315              :                                    nlevels, pab_blocks, grids, hab_blocks,
     316              :                                    forces, virial);
     317          158 :     break;
     318       190660 :   case GRID_BACKEND_CPU:
     319       190660 :     grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
     320              :                                  pab_blocks, grids, hab_blocks, forces, virial);
     321       190660 :     break;
     322           26 :   case GRID_BACKEND_REF:
     323           26 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     324              :                                  pab_blocks, grids, hab_blocks, forces, virial);
     325           26 :     break;
     326            0 :   default:
     327            0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     328            0 :     abort();
     329       190844 :     break;
     330              :   }
     331              : 
     332              :   // Perform validation if enabled.
     333       190844 :   if (grid_library_get_config().validate) {
     334              :     // Allocate space for reference results.
     335           26 :     const int hab_length = hab_blocks->size / sizeof(double);
     336           26 :     offload_buffer *hab_blocks_ref = NULL;
     337           26 :     offload_create_buffer(hab_length, &hab_blocks_ref);
     338           26 :     double forces_ref[natoms][3], virial_ref[3][3];
     339              : 
     340              :     // Call reference implementation.
     341           52 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     342              :                                  pab_blocks, grids, hab_blocks_ref,
     343              :                                  (forces != NULL) ? forces_ref : NULL,
     344              :                                  (virial != NULL) ? virial_ref : NULL);
     345              : 
     346              :     // Compare hab.
     347           26 :     const double hab_tolerance = 1e-12;
     348           26 :     double hab_max_rel_diff = 0.0;
     349         2187 :     for (int i = 0; i < hab_length; i++) {
     350         2161 :       const double ref_value = hab_blocks_ref->host_buffer[i];
     351         2161 :       const double test_value = hab_blocks->host_buffer[i];
     352         2161 :       const double diff = fabs(test_value - ref_value);
     353         2161 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     354         2161 :       hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
     355         2161 :       if (rel_diff > hab_tolerance) {
     356            0 :         fprintf(stderr, "Error: Validation failure in grid integrate\n");
     357            0 :         fprintf(stderr, "   hab diff:     %le\n", diff);
     358            0 :         fprintf(stderr, "   hab rel_diff: %le\n", rel_diff);
     359            0 :         fprintf(stderr, "   hab value:    %le\n", ref_value);
     360            0 :         fprintf(stderr, "   hab i:        %i\n", i);
     361            0 :         abort();
     362              :       }
     363              :     }
     364              : 
     365              :     // Compare forces.
     366           26 :     const double forces_tolerance = 1e-8; // account for higher numeric noise
     367           26 :     double forces_max_rel_diff = 0.0;
     368           26 :     if (forces != NULL) {
     369           14 :       for (int iatom = 0; iatom < natoms; iatom++) {
     370           40 :         for (int idir = 0; idir < 3; idir++) {
     371           30 :           const double ref_value = forces_ref[iatom][idir];
     372           30 :           const double test_value = forces[iatom][idir];
     373           30 :           const double diff = fabs(test_value - ref_value);
     374           30 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     375           30 :           forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
     376           30 :           if (rel_diff > forces_tolerance) {
     377            0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     378            0 :             fprintf(stderr, "   forces diff:     %le\n", diff);
     379            0 :             fprintf(stderr, "   forces rel_diff: %le\n", rel_diff);
     380            0 :             fprintf(stderr, "   forces value:    %le\n", ref_value);
     381            0 :             fprintf(stderr, "   forces atom:     %i\n", iatom);
     382            0 :             fprintf(stderr, "   forces dir:      %i\n", idir);
     383            0 :             abort();
     384              :           }
     385              :         }
     386              :       }
     387              :     }
     388              : 
     389              :     // Compare virial.
     390           26 :     const double virial_tolerance = 1e-8; // account for higher numeric noise
     391           26 :     double virial_max_rel_diff = 0.0;
     392           26 :     if (virial != NULL) {
     393            0 :       for (int i = 0; i < 3; i++) {
     394            0 :         for (int j = 0; j < 3; j++) {
     395            0 :           const double ref_value = virial_ref[i][j];
     396            0 :           const double test_value = virial[i][j];
     397            0 :           const double diff = fabs(test_value - ref_value);
     398            0 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     399            0 :           virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
     400            0 :           if (rel_diff > virial_tolerance) {
     401            0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     402            0 :             fprintf(stderr, "   virial diff:     %le\n", diff);
     403            0 :             fprintf(stderr, "   virial rel_diff: %le\n", rel_diff);
     404            0 :             fprintf(stderr, "   virial value:    %le\n", ref_value);
     405            0 :             fprintf(stderr, "   virial ij:       %i  %i\n", i, j);
     406            0 :             abort();
     407              :           }
     408              :         }
     409              :       }
     410              :     }
     411              : 
     412           26 :     printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
     413              :            hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
     414           26 :     offload_free_buffer(hab_blocks_ref);
     415              :   }
     416       190844 : }
     417              : 
     418              : // EOF
        

Generated by: LCOV version 2.0-1