LCOV - CP2K Regtests (git:3158929) - src/grid/grid_task

LCOV - code coverage report

Current view:	top level - src/grid - grid_task_list.c (source / functions)		Hit	Total	Coverage
Test:	CP2K Regtests (git:3158929)	Lines:	134	185	72.4 %
Date:	2025-07-22 22:41:15	Functions:	4	4	100.0 %

          Line data    Source code

       1             : /*----------------------------------------------------------------------------*/
       2             : /*  CP2K: A general program to perform molecular dynamics simulations         */
       3             : /*  Copyright 2000-2025 CP2K developers group <https://cp2k.org>              */
       4             : /*                                                                            */
       5             : /*  SPDX-License-Identifier: BSD-3-Clause                                     */
       6             : /*----------------------------------------------------------------------------*/
       7             : 
       8             : #include <assert.h>
       9             : #include <math.h>
      10             : #include <stddef.h>
      11             : #include <stdio.h>
      12             : #include <stdlib.h>
      13             : #include <string.h>
      14             : 
      15             : #include "common/grid_common.h"
      16             : #include "common/grid_constants.h"
      17             : #include "common/grid_library.h"
      18             : #include "grid_task_list.h"
      19             : 
      20             : /*******************************************************************************
      21             :  * \brief Allocates a task list which can be passed to grid_collocate_task_list.
      22             :  *        See grid_task_list.h for details.
      23             :  * \author Ole Schuett
      24             :  ******************************************************************************/
      25       13752 : void grid_create_task_list(
      26             :     const bool orthorhombic, const int ntasks, const int nlevels,
      27             :     const int natoms, const int nkinds, const int nblocks,
      28             :     const int block_offsets[nblocks], const double atom_positions[natoms][3],
      29             :     const int atom_kinds[natoms], const grid_basis_set *basis_sets[nkinds],
      30             :     const int level_list[ntasks], const int iatom_list[ntasks],
      31             :     const int jatom_list[ntasks], const int iset_list[ntasks],
      32             :     const int jset_list[ntasks], const int ipgf_list[ntasks],
      33             :     const int jpgf_list[ntasks], const int border_mask_list[ntasks],
      34             :     const int block_num_list[ntasks], const double radius_list[ntasks],
      35             :     const double rab_list[ntasks][3], const int npts_global[nlevels][3],
      36             :     const int npts_local[nlevels][3], const int shift_local[nlevels][3],
      37             :     const int border_width[nlevels][3], const double dh[nlevels][3][3],
      38             :     const double dh_inv[nlevels][3][3], grid_task_list **task_list_out) {
      39             : 
      40       13752 :   const grid_library_config config = grid_library_get_config();
      41             : 
      42       13752 :   grid_task_list *task_list = NULL;
      43             : 
      44       13752 :   if (*task_list_out == NULL) {
      45        8172 :     task_list = malloc(sizeof(grid_task_list));
      46        8172 :     assert(task_list != NULL);
      47        8172 :     memset(task_list, 0, sizeof(grid_task_list));
      48             : 
      49             :     // Resolve AUTO to a concrete backend.
      50        8172 :     if (config.backend == GRID_BACKEND_AUTO) {
      51             : 
      52             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
      53             :       task_list->backend = GRID_BACKEND_HIP;
      54             : #elif defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
      55             :       task_list->backend = GRID_BACKEND_GPU;
      56             : #else
      57        8152 :       task_list->backend = GRID_BACKEND_CPU;
      58             : #endif
      59             :     } else {
      60          20 :       task_list->backend = config.backend;
      61             :     }
      62             :   } else {
      63             :     // Reuse existing task list.
      64        5580 :     task_list = *task_list_out;
      65        5580 :     free(task_list->npts_local);
      66             :   }
      67             : 
      68             :   // Store npts_local for bounds checking and validation.
      69       13752 :   task_list->nlevels = nlevels;
      70       13752 :   size_t size = nlevels * 3 * sizeof(int);
      71       13752 :   task_list->npts_local = malloc(size);
      72       13752 :   assert(task_list->npts_local != NULL);
      73       13752 :   memcpy(task_list->npts_local, npts_local, size);
      74             : 
      75             :   // Always create reference backend because it might be needed for validation.
      76       13752 :   grid_ref_create_task_list(
      77             :       orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      78             :       atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      79             :       jatom_list, iset_list, jset_list, ipgf_list, jpgf_list, border_mask_list,
      80             :       block_num_list, radius_list, rab_list, npts_global, npts_local,
      81             :       shift_local, border_width, dh, dh_inv, &task_list->ref);
      82             : 
      83             :   // Create other backend, if selected.
      84       13752 :   switch (task_list->backend) {
      85             :   case GRID_BACKEND_REF:
      86             :     break; // was already created above
      87       13728 :   case GRID_BACKEND_CPU:
      88       13728 :     grid_cpu_create_task_list(
      89             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      90             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      91             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
      92             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
      93             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->cpu);
      94       13728 :     break;
      95          20 :   case GRID_BACKEND_DGEMM:
      96          20 :     grid_dgemm_create_task_list(
      97             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
      98             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
      99             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     100             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     101             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->dgemm);
     102          20 :     break;
     103             : 
     104           0 :   case GRID_BACKEND_GPU:
     105             : #if (defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID))
     106             :     grid_gpu_create_task_list(
     107             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     108             :         atom_positions, atom_kinds, basis_sets, level_list, iatom_list,
     109             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     110             :         border_mask_list, block_num_list, radius_list, rab_list, npts_global,
     111             :         npts_local, shift_local, border_width, dh, dh_inv, &task_list->gpu);
     112             : #else
     113           0 :     fprintf(stderr,
     114             :             "Error: The GPU grid backend is not available. "
     115             :             "Please re-compile with -D__OFFLOAD_CUDA or -D__OFFLOAD_HIP");
     116           0 :     abort();
     117             : #endif
     118           0 :     break;
     119             : 
     120           0 :   case GRID_BACKEND_HIP:
     121             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     122             :     grid_hip_create_task_list(
     123             :         orthorhombic, ntasks, nlevels, natoms, nkinds, nblocks, block_offsets,
     124             :         &atom_positions[0][0], atom_kinds, basis_sets, level_list, iatom_list,
     125             :         jatom_list, iset_list, jset_list, ipgf_list, jpgf_list,
     126             :         border_mask_list, block_num_list, radius_list, &rab_list[0][0],
     127             :         &npts_global[0][0], &npts_local[0][0], &shift_local[0][0],
     128             :         &border_width[0][0], &dh[0][0][0], &dh_inv[0][0][0], &task_list->hip);
     129             : #else
     130           0 :     fprintf(stderr, "Error: The HIP grid backend is not available. "
     131             :                     "Please re-compile with -D__OFFLOAD_HIP");
     132           0 :     abort();
     133             : #endif
     134           0 :     break;
     135             : 
     136           0 :   default:
     137           0 :     printf("Error: Unknown grid backend: %i.\n", config.backend);
     138           0 :     abort();
     139       13752 :     break;
     140             :   }
     141             : 
     142       13752 :   *task_list_out = task_list;
     143       13752 : }
     144             : 
     145             : /*******************************************************************************
     146             :  * \brief Deallocates given task list, basis_sets have to be freed separately.
     147             :  * \author Ole Schuett
     148             :  ******************************************************************************/
     149        8172 : void grid_free_task_list(grid_task_list *task_list) {
     150             : 
     151        8172 :   if (task_list->ref != NULL) {
     152        8172 :     grid_ref_free_task_list(task_list->ref);
     153        8172 :     task_list->ref = NULL;
     154             :   }
     155        8172 :   if (task_list->cpu != NULL) {
     156        8160 :     grid_cpu_free_task_list(task_list->cpu);
     157        8160 :     task_list->cpu = NULL;
     158             :   }
     159        8172 :   if (task_list->dgemm != NULL) {
     160           8 :     grid_dgemm_free_task_list(task_list->dgemm);
     161           8 :     task_list->dgemm = NULL;
     162             :   }
     163             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     164             :   if (task_list->gpu != NULL) {
     165             :     grid_gpu_free_task_list(task_list->gpu);
     166             :     task_list->gpu = NULL;
     167             :   }
     168             : #endif
     169             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     170             :   if (task_list->hip != NULL) {
     171             :     grid_hip_free_task_list(task_list->hip);
     172             :     task_list->hip = NULL;
     173             :   }
     174             : #endif
     175             : 
     176        8172 :   free(task_list->npts_local);
     177        8172 :   free(task_list);
     178        8172 : }
     179             : 
     180             : /*******************************************************************************
     181             :  * \brief Collocate all tasks of in given list onto given grids.
     182             :  *        See grid_task_list.h for details.
     183             :  * \author Ole Schuett
     184             :  ******************************************************************************/
     185      201654 : void grid_collocate_task_list(const grid_task_list *task_list,
     186             :                               const enum grid_func func, const int nlevels,
     187             :                               const int npts_local[nlevels][3],
     188             :                               const offload_buffer *pab_blocks,
     189             :                               offload_buffer *grids[nlevels]) {
     190             : 
     191             :   // Bounds check.
     192      201654 :   assert(task_list->nlevels == nlevels);
     193      999470 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     194      797816 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     195      797816 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     196      797816 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     197             :   }
     198             : 
     199      201654 :   switch (task_list->backend) {
     200          30 :   case GRID_BACKEND_REF:
     201          30 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     202             :                                  grids);
     203          30 :     break;
     204      201462 :   case GRID_BACKEND_CPU:
     205      201462 :     grid_cpu_collocate_task_list(task_list->cpu, func, nlevels, pab_blocks,
     206             :                                  grids);
     207      201462 :     break;
     208         162 :   case GRID_BACKEND_DGEMM:
     209         162 :     grid_dgemm_collocate_task_list(task_list->dgemm, func, nlevels, pab_blocks,
     210             :                                    grids);
     211         162 :     break;
     212             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     213             :   case GRID_BACKEND_GPU:
     214             :     grid_gpu_collocate_task_list(task_list->gpu, func, nlevels, pab_blocks,
     215             :                                  grids);
     216             :     break;
     217             : #endif
     218             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     219             :   case GRID_BACKEND_HIP:
     220             :     grid_hip_collocate_task_list(task_list->hip, func, nlevels, pab_blocks,
     221             :                                  grids);
     222             :     break;
     223             : #endif
     224           0 :   default:
     225           0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     226           0 :     abort();
     227      201654 :     break;
     228             :   }
     229             : 
     230             :   // Perform validation if enabled.
     231      201654 :   if (grid_library_get_config().validate) {
     232             :     // Allocate space for reference results.
     233          30 :     offload_buffer *grids_ref[nlevels];
     234         150 :     for (int level = 0; level < nlevels; level++) {
     235         120 :       const int npts_local_total =
     236         120 :           npts_local[level][0] * npts_local[level][1] * npts_local[level][2];
     237         120 :       grids_ref[level] = NULL;
     238         120 :       offload_create_buffer(npts_local_total, &grids_ref[level]);
     239             :     }
     240             : 
     241             :     // Call reference implementation.
     242          30 :     grid_ref_collocate_task_list(task_list->ref, func, nlevels, pab_blocks,
     243             :                                  grids_ref);
     244             : 
     245             :     // Compare results.
     246          30 :     const double tolerance = 1e-12;
     247          30 :     double max_rel_diff = 0.0;
     248         150 :     for (int level = 0; level < nlevels; level++) {
     249        2526 :       for (int i = 0; i < npts_local[level][0]; i++) {
     250       82680 :         for (int j = 0; j < npts_local[level][1]; j++) {
     251     3671568 :           for (int k = 0; k < npts_local[level][2]; k++) {
     252     3591294 :             const int idx = k * npts_local[level][1] * npts_local[level][0] +
     253     3591294 :                             j * npts_local[level][0] + i;
     254     3591294 :             const double ref_value = grids_ref[level]->host_buffer[idx];
     255     3591294 :             const double test_value = grids[level]->host_buffer[idx];
     256     3591294 :             const double diff = fabs(test_value - ref_value);
     257     3591294 :             const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     258     3591294 :             max_rel_diff = fmax(max_rel_diff, rel_diff);
     259     3591294 :             if (rel_diff > tolerance) {
     260           0 :               fprintf(stderr, "Error: Validation failure in grid collocate\n");
     261           0 :               fprintf(stderr, "   diff:     %le\n", diff);
     262           0 :               fprintf(stderr, "   rel_diff: %le\n", rel_diff);
     263           0 :               fprintf(stderr, "   value:    %le\n", ref_value);
     264           0 :               fprintf(stderr, "   level:    %i\n", level);
     265           0 :               fprintf(stderr, "   ijk:      %i  %i  %i\n", i, j, k);
     266           0 :               abort();
     267             :             }
     268             :           }
     269             :         }
     270             :       }
     271         120 :       offload_free_buffer(grids_ref[level]);
     272         120 :       printf("Validated grid collocate, max rel. diff: %le\n", max_rel_diff);
     273             :     }
     274             :   }
     275      201654 : }
     276             : 
     277             : /*******************************************************************************
     278             :  * \brief Integrate all tasks of in given list from given grids.
     279             :  *        See grid_task_list.h for details.
     280             :  * \author Ole Schuett
     281             :  ******************************************************************************/
     282      185848 : void grid_integrate_task_list(
     283             :     const grid_task_list *task_list, const bool compute_tau, const int natoms,
     284             :     const int nlevels, const int npts_local[nlevels][3],
     285             :     const offload_buffer *pab_blocks, const offload_buffer *grids[nlevels],
     286             :     offload_buffer *hab_blocks, double forces[natoms][3], double virial[3][3]) {
     287             : 
     288             :   // Bounds check.
     289      185848 :   assert(task_list->nlevels == nlevels);
     290      921234 :   for (int ilevel = 0; ilevel < nlevels; ilevel++) {
     291      735386 :     assert(task_list->npts_local[ilevel][0] == npts_local[ilevel][0]);
     292      735386 :     assert(task_list->npts_local[ilevel][1] == npts_local[ilevel][1]);
     293      735386 :     assert(task_list->npts_local[ilevel][2] == npts_local[ilevel][2]);
     294             :   }
     295             : 
     296      185848 :   assert(forces == NULL || pab_blocks != NULL);
     297      185848 :   assert(virial == NULL || pab_blocks != NULL);
     298             : 
     299      185848 :   switch (task_list->backend) {
     300             : #if defined(__OFFLOAD) && !defined(__NO_OFFLOAD_GRID)
     301             :   case GRID_BACKEND_GPU:
     302             :     grid_gpu_integrate_task_list(task_list->gpu, compute_tau, natoms, nlevels,
     303             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     304             :     break;
     305             : #endif
     306             : #if defined(__OFFLOAD_HIP) && !defined(__NO_OFFLOAD_GRID)
     307             :   case GRID_BACKEND_HIP:
     308             :     grid_hip_integrate_task_list(task_list->hip, compute_tau, nlevels,
     309             :                                  pab_blocks, grids, hab_blocks, &forces[0][0],
     310             :                                  &virial[0][0]);
     311             :     break;
     312             : #endif
     313         158 :   case GRID_BACKEND_DGEMM:
     314         158 :     grid_dgemm_integrate_task_list(task_list->dgemm, compute_tau, natoms,
     315             :                                    nlevels, pab_blocks, grids, hab_blocks,
     316             :                                    forces, virial);
     317         158 :     break;
     318      185664 :   case GRID_BACKEND_CPU:
     319      185664 :     grid_cpu_integrate_task_list(task_list->cpu, compute_tau, natoms, nlevels,
     320             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     321      185664 :     break;
     322          26 :   case GRID_BACKEND_REF:
     323          26 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     324             :                                  pab_blocks, grids, hab_blocks, forces, virial);
     325          26 :     break;
     326           0 :   default:
     327           0 :     printf("Error: Unknown grid backend: %i.\n", task_list->backend);
     328           0 :     abort();
     329      185848 :     break;
     330             :   }
     331             : 
     332             :   // Perform validation if enabled.
     333      185848 :   if (grid_library_get_config().validate) {
     334             :     // Allocate space for reference results.
     335          26 :     const int hab_length = hab_blocks->size / sizeof(double);
     336          26 :     offload_buffer *hab_blocks_ref = NULL;
     337          26 :     offload_create_buffer(hab_length, &hab_blocks_ref);
     338          26 :     double forces_ref[natoms][3], virial_ref[3][3];
     339             : 
     340             :     // Call reference implementation.
     341          52 :     grid_ref_integrate_task_list(task_list->ref, compute_tau, natoms, nlevels,
     342             :                                  pab_blocks, grids, hab_blocks_ref,
     343             :                                  (forces != NULL) ? forces_ref : NULL,
     344             :                                  (virial != NULL) ? virial_ref : NULL);
     345             : 
     346             :     // Compare hab.
     347          26 :     const double hab_tolerance = 1e-12;
     348          26 :     double hab_max_rel_diff = 0.0;
     349        2187 :     for (int i = 0; i < hab_length; i++) {
     350        2161 :       const double ref_value = hab_blocks_ref->host_buffer[i];
     351        2161 :       const double test_value = hab_blocks->host_buffer[i];
     352        2161 :       const double diff = fabs(test_value - ref_value);
     353        2161 :       const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     354        2161 :       hab_max_rel_diff = fmax(hab_max_rel_diff, rel_diff);
     355        2161 :       if (rel_diff > hab_tolerance) {
     356           0 :         fprintf(stderr, "Error: Validation failure in grid integrate\n");
     357           0 :         fprintf(stderr, "   hab diff:     %le\n", diff);
     358           0 :         fprintf(stderr, "   hab rel_diff: %le\n", rel_diff);
     359           0 :         fprintf(stderr, "   hab value:    %le\n", ref_value);
     360           0 :         fprintf(stderr, "   hab i:        %i\n", i);
     361           0 :         abort();
     362             :       }
     363             :     }
     364             : 
     365             :     // Compare forces.
     366          26 :     const double forces_tolerance = 1e-8; // account for higher numeric noise
     367          26 :     double forces_max_rel_diff = 0.0;
     368          26 :     if (forces != NULL) {
     369          14 :       for (int iatom = 0; iatom < natoms; iatom++) {
     370          40 :         for (int idir = 0; idir < 3; idir++) {
     371          30 :           const double ref_value = forces_ref[iatom][idir];
     372          30 :           const double test_value = forces[iatom][idir];
     373          30 :           const double diff = fabs(test_value - ref_value);
     374          30 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     375          30 :           forces_max_rel_diff = fmax(forces_max_rel_diff, rel_diff);
     376          30 :           if (rel_diff > forces_tolerance) {
     377           0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     378           0 :             fprintf(stderr, "   forces diff:     %le\n", diff);
     379           0 :             fprintf(stderr, "   forces rel_diff: %le\n", rel_diff);
     380           0 :             fprintf(stderr, "   forces value:    %le\n", ref_value);
     381           0 :             fprintf(stderr, "   forces atom:     %i\n", iatom);
     382           0 :             fprintf(stderr, "   forces dir:      %i\n", idir);
     383           0 :             abort();
     384             :           }
     385             :         }
     386             :       }
     387             :     }
     388             : 
     389             :     // Compare virial.
     390          26 :     const double virial_tolerance = 1e-8; // account for higher numeric noise
     391          26 :     double virial_max_rel_diff = 0.0;
     392          26 :     if (virial != NULL) {
     393           0 :       for (int i = 0; i < 3; i++) {
     394           0 :         for (int j = 0; j < 3; j++) {
     395           0 :           const double ref_value = virial_ref[i][j];
     396           0 :           const double test_value = virial[i][j];
     397           0 :           const double diff = fabs(test_value - ref_value);
     398           0 :           const double rel_diff = diff / fmax(1.0, fabs(ref_value));
     399           0 :           virial_max_rel_diff = fmax(virial_max_rel_diff, rel_diff);
     400           0 :           if (rel_diff > virial_tolerance) {
     401           0 :             fprintf(stderr, "Error: Validation failure in grid integrate\n");
     402           0 :             fprintf(stderr, "   virial diff:     %le\n", diff);
     403           0 :             fprintf(stderr, "   virial rel_diff: %le\n", rel_diff);
     404           0 :             fprintf(stderr, "   virial value:    %le\n", ref_value);
     405           0 :             fprintf(stderr, "   virial ij:       %i  %i\n", i, j);
     406           0 :             abort();
     407             :           }
     408             :         }
     409             :       }
     410             :     }
     411             : 
     412          26 :     printf("Validated grid_integrate, max rel. diff: %le %le %le\n",
     413             :            hab_max_rel_diff, forces_max_rel_diff, virial_max_rel_diff);
     414          26 :     offload_free_buffer(hab_blocks_ref);
     415             :   }
     416      185848 : }
     417             : 
     418             : // EOF

Generated by: LCOV version 1.15